From 4cdd1d271c23e10e22c77f274f9e57f65b11a9c7 Mon Sep 17 00:00:00 2001 From: Simon-Christian Klein Date: Fri, 5 Oct 2018 11:12:58 +0200 Subject: [PATCH 1/2] WIP: Python backend. First commit. Only the SPRK33 TI is available, NO DC, NO ARTIFICIAL DISSIPATION --- python/InductionEq.py | 435 ++++++++++++++++++++++++++++ python/cl.py | 57 ++++ python/compute_error_rotation_2d.py | 165 +++++++++++ 3 files changed, 657 insertions(+) create mode 100644 python/InductionEq.py create mode 100644 python/cl.py create mode 100644 python/compute_error_rotation_2d.py diff --git a/python/InductionEq.py b/python/InductionEq.py new file mode 100644 index 0000000..f471f38 --- /dev/null +++ b/python/InductionEq.py @@ -0,0 +1,435 @@ +import math +import numpy as np +#import pyopencl as cl +import cl + +# from +InductionEq/preparse_vars.m +def preparse_vars(): + global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps, I_Results + I_Mesh = {'NODES_X':0, 'NODES_Y':0, 'NODES_Z':0, 'DX':0, 'DY':0, 'DZ':0, 'XMIN':0, 'XMAX':0, 'YMAX':0, 'YMIN':0, 'ZMIN':0, 'ZMAX':0} + I_TI = {'cfl':0, 'final_time':0, 'time_integrator':'\0', 'DT':0, 'num_steps':0} + I_IEq = {'form_uibj':0, 'form_source':0, 'form_ujbi':0, 'dissipation':'NONE', 'dissipation_form':'\0', 'HO_DISSIPATION_FACTOR':1, 'hall_term':'NONE', 'g_range':0, 'l_range':0, 'MP2MIN':0, 'MP2MAX':0, 'CMIN':0, 'CMAX':0} + I_DC = {'use':0, 'form':'\0', 'BNODES':0, 'error_threshold':0, 'g_range':0, 'l_range':0,'max_iterations':0} + I_Tech = {'device':0, 'REAL':'\0','num_nodes_pad':0, 'num_groups':0,'W_SIZE':0, 'g_range':0, 'l_range':0, 'optimizations':'\0', 'context' : None} + I_RunOps = {'order':0, 'operator_form':'classical', 'testcase':'\0', 'variable_u':False, 'periodic':'NONE', 'plot_energy':0, 'plot_numerical_solution':0,'plot_analytical_solution':0, 'plot_difference':0, 'plot_divergence':0, 'save_fields':False, 'save_integrals_over_time':False} + I_Results = {'abs_err':0, 'rel_err':0, 'field_b':0, 'field_b_ana':0, 'energy':0, 'divergence_norm':0, 'runtime':0, 'kernel_runtime':0, 'energy_over_time':0, 'L2error_B_over_time':0, 'L2error_divB_over_time':0, 'time':0 } + +# from generate_settings.m + +def generate_settings(map, keys): + # Atomatically generates a formatted settings string that contains all + # compiler optimizations and OpenCL defines of map with the given keys where + # keys is a cell array, e.g. settings_mesh = generate_settings(I_Mesh, {'DX'; 'DY'; 'DZ'} + + settings = ''; + for key in keys: + if key in map: + if not(type(map[key]) is float or type(map[key]) is int): + if key== 'optimizations': + settings = settings + map[key] + elif 'USE' in map[key]: + settings = settings + ' -D' + map[key] + '=1' + else: + settings = settings + ' -D' + key + '=' + str(map[key]) + else: + settings = settings + ' -D' + key + "=" + str(map[key]) + else: + print('Unknown identifier ' + keys[i] + '\n') + return settings + + + + +# from +InductionEq/initialize.m +def initialize(): + global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps + + +# *NEW* generate opencl context with PyOpenCl + cl.create_ctx() + +# a python float is a 64bit float => float means double + if I_RunOps['periodic'] == 'USE_PERIODIC': + DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / float(I_Mesh['NODES_X']); + DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / float(I_Mesh['NODES_Y']); + DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / float(I_Mesh['NODES_Z']); + else: + DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / (float(I_Mesh['NODES_X'])-1); + DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / (float(I_Mesh['NODES_Y'])-1); + DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / (float(I_Mesh['NODES_Z'])-1); + + + I_Mesh['DX'] = DX; I_Mesh['DY'] = DY; I_Mesh['DZ'] = DZ; + num_nodes = I_Mesh['NODES_X']*I_Mesh['NODES_Y']*I_Mesh['NODES_Z']; + +# later ... +# type = dev_type(I_Tech('device')); +# if (strcmp(type{1},'CPU')) +# group_size = 16; +# else +# if (num_nodes > lw_size(I_Tech('device'))) +# group_size = lw_size(I_Tech('device')); +# else +# group_size = 2^floor(log(num_nodes) / log(2)); +# end +# end + +# FOR MY GPU + group_size = 1024 + + group_size = float(group_size); + num_nodes_pad = math.ceil(float(num_nodes)/group_size)*group_size + num_groups = math.ceil(num_nodes_pad/group_size) + + I_Tech['num_nodes_pad'] = int(num_nodes_pad) + + I_Tech['num_groups'] = int(num_groups); + I_Tech['W_SIZE'] = int(group_size); + + +# Initialise fields: magnetic, veclocity, and charge density + if I_Tech['REAL'] == 'float': + field_u_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) + field_b_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) + field_rho_init = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)+1 + else: + field_u_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64) + field_b_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64) + field_rho_init = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64)+1 + + +# Generate settings needed for computation of the initial state for the +# specified testcase. + settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'optimizations']); + settings_mesh = generate_settings(I_Mesh, ['DX', 'DY', 'DZ', 'NODES_X', 'NODES_Y', 'NODES_Z','XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX']) + settings = settings_tech + settings_mesh + + + kernel_path_list = [] + kernel_path_list.append('../include/'+I_RunOps['testcase']+'.h') + kernel_path_list.append('../include/utils.h') + kernel_path_list.append('../kernel/kernel_init.cl') + + cl.compile_kernels(kernel_path_list, settings) + + # Set global and local range for calculation of the inital state + # The global and local range can either be a vector of type uint32 or + # double. Double is supported due to the fact that by default Matlab + # stores all variables as double. + I_IEq['g_range'] = np.array([I_Mesh['NODES_X'], I_Mesh ['NODES_Y'], I_Mesh['NODES_Z']], dtype=np.int32) + I_IEq['l_range'] = np.array([0], dtype=np.int32) + + # Compute initial state for magnetic field, velocity field and density + # field + cl.run_kernel('init_b', I_IEq['g_range'], I_IEq['l_range'], field_b_init) + cl.run_kernel('init_u', I_IEq['g_range'], I_IEq['l_range'], field_u_init) + cl.run_kernel('init_rho', I_IEq['g_range'], I_IEq['l_range'], field_rho_init) + + # Compute time step from CFL number, min step size and maximal velocity + u_max = max(np.sqrt(np.power(field_u_init[1,:], 2) + np.power(field_u_init[2,:],2) + np.power(field_u_init[3,:],2))) + dt = I_TI['cfl'] * min([DX, DY, DZ]) / float(u_max) + num_steps = math.ceil(I_TI['final_time']/dt) + dt = I_TI['final_time'] / num_steps + + I_TI['DT'] = dt + I_TI['num_steps'] = num_steps + + # Global and local range for dot product and norm + I_Tech['g_range'] = np.array([I_Tech['num_nodes_pad'], 1, 1], dtype=np.uint32) + I_Tech['l_range'] = np.array([I_Tech['W_SIZE'], 1, 1], dtype=np.uint32) + + # Depending on the discretization of the laplace operator for + # divergence cleaning set the number of boundary nodes + if I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_LNS': + I_DC['BNODES'] = 0; + elif (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_DIRICHLET') or (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_NARROW_STENCIL_DIRICHLET'): + I_DC['BNODES'] = 1; + else: + print('ERROR: Option I_DC(''divergence_cleaning_form'') = '+str(I_DC['divergence_cleaning_form']) + ' unknown.') + + # Set global and local range for divergence cleaning + I_DC['g_range'] = np.array([I_Mesh['NODES_X']-2*I_DC['BNODES'], I_Mesh['NODES_Y']-2*I_DC['BNODES'], I_Mesh['NODES_Z']-2*I_DC['BNODES']], dtype=np.int32) + I_DC['l_range'] = np.array([0], dtype=np.int32) + + return field_b_init, field_u_init, field_rho_init + +def compute_numerical_solution_setup(field_b, field_u, field_rho): + global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps + + # Memory allocation for fields + if I_Tech['REAL'] == 'float': + # Initialize auxillary fields needed for time stepping + field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) + field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) + field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) + field_curlB_rho = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) + current_time = np.zeros(2, dtype=np.float32) # contains two elements: time at the beginning of a step and of a stage + norm2_output = np.zeros((1, I_Tech['num_groups']), dtype=np.float32) + # Allocate memory for field used for divergence cleaning + if I_DC['divergence_cleaning']: + #Field for divergence of b-field and phi + field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) + field_phi = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) + #Fields needed for CG + field_laplace = np.zeros((1, I_Tech['num_nodes_pad']) , dtype=np.float32); + field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32); + field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype = np.float32); + + energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32); + L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32) + L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32); + else: + # Initialize auxillary fields needed for time stepping + field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64) + field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64) + field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64) + field_curlB_rho = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64) + current_time = np.zeros(2, dtype=np.float64) # contains two elements: time at the beginning of a step and of a stage + norm2_output = np.zeros((1, I_Tech['num_groups']), dtype=np.float64) + # Allocate memory for field used for divergence cleaning + if I_DC['divergence_cleaning']: + #Field for divergence of b-field and phi + field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64) + field_phi = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64) + #Fields needed for CG + field_laplace = np.zeros((1, I_Tech['num_nodes_pad']) , dtype=np.float64) + field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64); + field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype =np.float64) + + energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64); + L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64) + L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64); + + kernel_path_list = [] + +# Include header file containing the coefficients of the respective order + if I_RunOps['order'] == 2: + if I_RunOps['operator_form'] == 'extended': + kernel_path_list.append('../include/2ndOrderExtended.h') + else: # classical operators + kernel_path_list.append('../include/2ndOrder.h') + + elif I_RunOps['order'] == 4: + if I_RunOps['operator_form'] == 'extended': + kernel_path_list.append('../include/4thOrderExtended.h') + else: # classical operators + kernel_path_list.append('../include/4thOrder.h') + + elif I_RunOps['order'] == 6: + if I_RunOps['operator_form'] == 'extended': + kernel_path_list.append('../include/6thOrderExtended.h') + else: # classical operators + kernel_path_list.append('../include/6thOrder.h') + + else: + print('Specify order \n') + + kernel_path_list.append('../include/utils.h') + kernel_path_list.append('../include/' + I_RunOps['testcase'] + '.h') + kernel_path_list.append('../kernel/kernel_init.cl') + + kernel_path_list.append('../kernel/SBP_operator.cl') + + kernel_path_list.append('../kernel/kernel_operator.cl') + + # Include files for divergence cleaning if desired + if I_DC['divergence_cleaning']: + pass + #Prepare temporary fields for divergence cleaning +# DC_fields = struct('divB',field_divB,'phi',field_phi,'laplace', field_laplace,'r', field_r,'u', field_u_div_cleaning,'output', norm2_output); +# kernel_path_list = [kernel_path_list, {'../kernel/kernel_div_cleaning.cl'}]; +# kernel_path_list = [kernel_path_list, {'../kernel/kernel_CG.cl'}]; +# settings_dc = generate_settings(I_DC, {'BNODES', 'divergence_cleaning_form'}); + else: + DC_fields = 0 + settings_dc ='' + +# Include files for artificial dissipation if desired + if I_IEq['dissipation'] == 'USE_ARTIFICIAL_DISSIPATION': + pass +# kernel_path_list = [kernel_path_list, {'../include/artificial_dissipation.h'}]; +# kernel_path_list = [kernel_path_list, {'../kernel/artificial_dissipation.cl'}]; +# settings_dissipation = generate_settings(I_IEq, {'dissipation', 'dissipation_form', 'HO_DISSIPATION_FACTOR'}); + + if I_IEq['dissipation_form'] == 'USE_ADAPTIVE_DISSIPATION': + pass +# settings_dissipation = strcat(settings_dissipation, generate_settings(I_IEq, {'MP2MIN'; 'MP2MAX'; 'CMIN'; 'CMAX'})); + else: + settings_dissipation = '' + + kernel_path_list.append('../kernel/induction_Eq_volume.cl') + kernel_path_list.append('../kernel/induction_Eq_surface.cl') + kernel_path_list.append('../kernel/kernel_dot_product.cl') + kernel_path_list.append('../kernel/kernel_time_integrator.cl') + +# Specify compile settings + settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'W_SIZE', 'optimizations']) + + settings_mesh = generate_settings(I_Mesh, ['DX', 'DY', 'DZ','NODES_X', 'NODES_Y', 'NODES_Z', 'XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX']) + + settings_induction_eq = generate_settings(I_IEq, ['form_uibj', 'form_source', 'form_ujbi', 'hall_term']) + + settings_time_integration = generate_settings(I_TI, ['DT']) + + settings_runops = generate_settings(I_RunOps, ['periodic']) + + + settings = settings_induction_eq + settings_time_integration + settings_tech + settings_mesh + settings_dissipation + settings_dc + settings_runops + + + if I_TI['time_integrator'] == 'SSPRK33': + # 'SSPRK33_1', 'SSPRK33_2a', 'SSPRK33_2b', 'SSPRK33_3a', 'SSPRK33_3b'}; + # calc_curlB_rho = {'calc_curlB_rho_1_3args', 'calc_curlB_rho_2_3args', 'calc_curlB_rho_3_3args'}; + RK_Step = []; + if I_IEq['hall_term'] == 'USE_HALL': + RK_Step.append('calc_curlB_rho_1_3args') + if I_RunOps['variable_u']: + RK_Step.append('calc_u_3_args') + RK_Step.append('SSPRK33_1') + + RK_Step.append('SSPRK33_2a') + if I_IEq['hall_term']== 'USE_HALL': + RK_Step.append('calc_curlB_rho_2_3args') + if I_RunOps['variable_u']: + RK_Step.append('calc_u_3_args') + RK_Step.append('SSPRK33_2b') + + RK_Step.append('SSPRK33_3a') + if I_IEq['hall_term'] == 'USE_HALL': + RK_Step.append('calc_curlB_rho_3_3args') + if I_RunOps['variable_u']: + RK_Step.append('calc_u_3_args') + RK_Step.append('SSPRK33_3b') + + RK_Step.append('calc_time_3_args') + + time_integrator_num_fields = 3; + +# switch time_integrator_num_fields + if time_integrator_num_fields == 2: + IEq_fields = {'field_b':field_b, \ + 'field_b2': field_b2, \ + 'field_curlB_rho': field_curlB_rho, \ + 'field_u': field_u, \ + 'field_rho':field_rho, \ + 'current_time':current_time, \ + 'field_divB': field_divB, \ + 'norm2_output':norm2_output, \ + 'energy': energy, \ + 'L2error_B': L2error_B, \ + 'L2error_divB': L2error_divB} + elif time_integrator_num_fields == 3: + IEq_fields = {'field_b': field_b, \ + 'field_b2': field_b2, \ + 'field_b3': field_b3, \ + 'field_curlB_rho': field_curlB_rho, \ + 'field_u': field_u, \ + 'field_rho': field_rho, \ + 'current_time': current_time, \ + 'field_divB': field_divB, \ + 'norm2_output': norm2_output,\ + 'energy': energy,\ + 'L2error_B': L2error_B, \ + 'L2error_divB': L2error_divB} + else: + print('Wrong value of time_integrator_num_fields =' + str( time_integrator_num_fields)) + + + return kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step + +def compute_numerical_solution(field_b, field_u, field_rho): + global I_mesh, I_TI, I_IEq, I_Tech, I_RunOps, I_Results + +# setup fields, settings etc. + (kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step) = compute_numerical_solution_setup(field_b, field_u, field_rho) + +# Initialise logging variables + kernel_runtime = 0; + RK_block_size = 15000; + if I_RunOps['save_integrals_over_time']: + energy = IEq_fields['energy'] + L2error_B = IEq_fields['L2error_B'] + L2error_divB = IEq_fields['L2error_divB'] + + norm2_output = IEq_fields['norm2_output'] + +# Compile kernel + cl.compile_kernels(kernel_path_list, settings); + +# Switch case for time integrator + + if time_integrator_num_fields == 2: + print("time_integrator_num_fields = 2 not ready yet \n") + elif time_integrator_num_fields == 3: + field_b = IEq_fields['field_b'] + field_b2 = IEq_fields['field_b2'] + field_b3 = IEq_fields['field_b3'] + field_curlB_rho = IEq_fields['field_curlB_rho'] + field_u = IEq_fields['field_u'] + field_rho = IEq_fields['field_rho'] + current_time = IEq_fields['current_time'] + field_divB = IEq_fields['field_divB'] + + if False: #other versions + pass + else: + num_steps_run = I_TI['num_steps']; + while num_steps_run > RK_block_size: + kernel_list = RK_Step * RK_block_size + t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\ + field_b, field_b2, field_b3, field_curlB_rho,\ + field_u, field_rho, current_time) + kernel_runtime = kernel_runtime + t; + num_steps_run = num_steps_run - RK_block_size; + + if num_steps_run > 0: + kernel_list = RK_Step *num_steps_run + t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\ + field_b, field_b2, field_b3, field_curlB_rho,\ + field_u, field_rho, current_time); + kernel_runtime = kernel_runtime + t; + + else: + print('Wrong value of time_integrator_num_fields = ' + str(time_integrator_num_fields) + ' \n') + #runtime = toc; + runtime = 0 + print('Elapsed time is' + str(runtime) + 'seconds.\n') + + # Save total runtime and kernel runtime + I_Results['runtime'] = runtime; + I_Results['kernel_runtime'] = kernel_runtime; + + # save fields + if I_RunOps['save_fields']: + I_Results['field_b'] = field_b; + + field_b_ana = IEq_fields['field_b2'] + + #Calculate analytical solution and error + current_time[1] = I_TI['final_time'] + cl.run_kernel('analytical_b', I_IEq['g_range'], I_IEq['l_range'], field_b_ana, current_time); + + norm2_output[:] = 0; + + cl.run_kernel('norm2_diff', I_Tech['g_range'], I_Tech['l_range'], field_b, field_b_ana, norm2_output); + I_Results['abs_err'] = math.sqrt(sum(sum(norm2_output))); + + + norm2_output[:] = 0; + cl.run_kernel( 'norm2', I_Tech['g_range'], I_Tech['l_range'], field_b_ana, norm2_output); + I_Results['rel_err'] = I_Results['abs_err'] / math.sqrt(norm2_output.sum()); + + + #Calculate divergence + cl.run_kernel('calc_div', I_IEq['g_range'], I_IEq['l_range'], field_b, field_divB); + norm2_output[:] = 0; + cl.run_kernel('norm2_S', I_Tech['g_range'], I_Tech['l_range'], field_divB, norm2_output); + I_Results['divergence_norm'] = math.sqrt(norm2_output.sum()); + + + #Calculate energy + norm2_output[:] = 0; + cl.run_kernel('norm2', I_Tech['g_range'], I_Tech['l_range'], field_b, norm2_output); + I_Results['energy'] = norm2_output.sum(); + + diff --git a/python/cl.py b/python/cl.py new file mode 100644 index 0000000..cc177f5 --- /dev/null +++ b/python/cl.py @@ -0,0 +1,57 @@ +import pyopencl as cl + +def compile_kernels(src_path, cmd_line, *args): + global program, ctx + src = ' ' + for entry in src_path: + file = open(entry, 'r') + src = src + file.read() + file.close() + prg = cl.Program(ctx, src).build(options=cmd_line) + program = prg + return prg + +def create_ctx(): + global ctx, queue, PYOPENCL_COMPILER_OUTPUT + PYOPENCL_COMPILER_OUTPUT=1 + ctx = cl.create_some_context() + queue = cl.CommandQueue(ctx) + return ctx, queue + +def run_kernel(names, g_range, l_range, *arguments): + global queue, program + inputbuffers = [] + for arg in arguments: + inputbuffers.append(cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = arg)) + callstrings = [] + if type(names) is list: + for name in names: + callstrings.append('program.' + name + '(queue, tuple(g_range) , None, *inputbuffers)') + else: + callstrings.append('program.' + names + '(queue, tuple(g_range), None, *inputbuffers)') + + for callstring in callstrings: + event = eval(callstring) +# event.wait() + i = 0 + for arg in arguments: + cl.enqueue_copy(queue, arg, inputbuffers[i]) +# inputbuffers[i].release() + i = i +1 + return 0 + +def run_kernel_ref(name, g_range, l_range, *arg_refs): + global queue, program + callstring = 'program.' + name + '(queue, None, None, *inputbuffers)' + eval(callstring) + return + +def allocate_buf(array): + return cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = array) + +def malloc(size): + return cl.Buffer(ctx, size=size) + + + + diff --git a/python/compute_error_rotation_2d.py b/python/compute_error_rotation_2d.py new file mode 100644 index 0000000..e5cd9ed --- /dev/null +++ b/python/compute_error_rotation_2d.py @@ -0,0 +1,165 @@ +# -*- coding: -*- + +#This project is licensed under the terms of the Creative Commons CC BY-NC-ND 3.0 license. + +#Use this script to specify the general conditions and the testcase you +#want to investigate. It calls functions to initializes all variables and fields relevant for +#the testcase and advance the numerical solution in time. Depending on the +#properties that shall be investigated additional functionalities can be +#enabled, e.g. divergence cleaning. +import InductionEq as induction_eq +from math import pi +#prepare_vars() initializes containers (maps) in which all variables (label, value) are +#stored. Variables are grouped together by purpose: +#I_Mesh: Contains all variables concerning the discrete mesh, e.g. +#stepsize, box size +#I_TI: Contains all variables concerning time integration, e.g. time +#integrator, timestep +#I_IEq: Contains all variables relevant for the induction equation, e.g. +#discretization form, switch for additional artificial dissipation +#I_DC: Contains all variables concerning divergence cleaning, e.g. +#discretization, error threshold +#I_Tech: Contains all variables of technical nature, e.g. the device, +#computation precision +#I_RunOps: Specifies the parameters of a computation, e.g. which variables +#will be saved, the testcase + +#Variables in capital letters are program specific defines which are set by +#openCL compile settings. If the value is a string in captial letters +#beginning with USE it acts as a switch, e.g. to enable artificial dissipation +#or to switch between different discretizations. In case the key is written +#in capital letters the program define will be set to the corresponding +#value of the key. +induction_eq.preparse_vars(); + + +#global I_Mesh I_TI I_IEq I_DC I_Tech I_RunOps I_Results + +I_Mesh = induction_eq.I_Mesh +I_TI = induction_eq.I_TI +I_IEq = induction_eq.I_IEq +I_DC = induction_eq.I_DC +I_Tech = induction_eq.I_Tech +I_RunOps = induction_eq.I_RunOps +I_Results = induction_eq.I_Results + +# Mesh related parameters. The minimum number of nodes per direction is +# constrained by the number of boundary nodes, e.g. the 4th order method +# has 2*4 boundary nodes which means the minimum number of nodes amounts to 8. + +N = 40 +I_Mesh['NODES_X'],I_Mesh['NODES_Y'],I_Mesh['NODES_Z'] = (N, N, 12) +I_Mesh['XMIN'], I_Mesh['XMAX'] = (-1.0, 1.0) +I_Mesh['YMIN'], I_Mesh['YMAX'] = (-1.0, 1.0) +I_Mesh['ZMIN'], I_Mesh['ZMAX'] = (-1.0, 1.0) + +# Time integration related variables +I_TI['cfl'] = 0.95 #Define the Courant–Friedrichs–Lewy condition +I_TI['final_time'] = 2*pi +#Chose the time integrator. Below is a list of up to date available +#options: +# SSPRK33, SSPRK104, +# KennedyCarpenterLewis2R54C, CalvoFrancoRandez2R64, +# CarpenterKennedy2N54, ToulorgeDesmet2N84F +I_TI['time_integrator'] = 'SSPRK33' #'KennedyCarpenterLewis2R54C'; + + +# Induction equation related variables +#Specify how the three part of the linear induction equation shall be +#computed. +I_IEq['form_uibj'] = 'USE_UIBJ_PRODUCT'; # PRODUCT, SPLIT, CENTRAL +I_IEq['form_source'] = 'USE_SOURCE_CENTRAL'; # CENTRAL, SPLIT, ZERO +I_IEq['form_ujbi'] = 'USE_UJBI_CENTRAL'; # SPLIT, CENTRAL, PRODUCT +#Enable or disable Hall-Term +I_IEq['hall_term'] = 'NONE'; # NONE, USE_HALL +#Enable or disable artificial dissipation +I_IEq['dissipation'] = 'NONE'; # NONE, USE_ARTIFICIAL_DISSIPATION +#Specify what kind of artifical dissipation shall be used + #USE_ADAPTIVE_DISSIPATION, USE_FIRST_ORDER_DISSIPATION, USE_HIGH_ORDER_DISSIPATION +I_IEq['dissipation_form'] = 'USE_HIGH_ORDER_DISSIPATION'; +I_IEq['HO_DISSIPATION_FACTOR'] = 1; # a constant (non-negative) factor adapting the influence of the high order dissipation +#Additional parameters needed for adaptive dissipation. For typical values +#see Svärd et al. (2009). +I_IEq['MP2MIN'] = 1 +I_IEq['MP2MAX'] = 10 +I_IEq['CMIN'] = 1 +I_IEq['CMAX'] = 10 + + +# Divergence cleaning related variables +#Enable divergence cleaning (True) or disable divergence cleaning (False) +I_DC['divergence_cleaning'] = False +#Choose how the laplace operator will be discretized +I_DC['divergence_cleaning_form'] = 'USE_LAPLACE_WIDE_STENCIL_LNS' +#USE_LAPLACE_WIDE_STENCIL_LNS USE_LAPLACE_WIDE_STENCIL_DIRICHLET USE_LAPLACE_NARROW_STENCIL_DIRICHLET +#Specify a threshold for the divergence norm +I_DC['absolute_error_threshold'] = 1e-3; +#The divergence cleaner will exit after max_iterations even if the error +#threshold is not reached +I_DC['max_iterations'] = 50; + + +# Technical details +# Specify the OpenCL device that will be used for computation +I_Tech['device'] = 1; + +# Switch between single floating point and double floating point precision +I_Tech['REAL'] = 'double' # float +I_Tech['REAL4'] = I_Tech['REAL'] + '4'; #Vector datatype + +#Compiler based optimizations +if I_Tech['REAL'] == 'float': + I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only -cl-single-precision-constant'; +else: + I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only'; + + +# Options relevant for a run +# Defines the order +I_RunOps['order'] = 4; # 2, 4, 6 +I_RunOps['operator_form'] = 'classical'; # 'classical' or 'extended' operators +# Specify the testcase. The name of the testcase has to be equal to the +# name of a header file which contains a function describing the initial state. +# Example testcases are: +# rotation_2D, rotation_3D, alfven_periodic_2D, hall_travelling_wave, hall_periodic +induction_eq.I_RunOps['testcase'] = 'rotation_2D'; +I_RunOps['variable_u'] = False; # must be set to True if a variable velocity is used +I_RunOps['periodic'] = 'NONE'; # 'NONE', 'USE_PERIODIC'; must be set to 'USE_PERIODIC' + # if periodic boundary conditions should be used +# Optional plotting parameters (2D plots). +# Choose the cross section with +# 'x', 'y', 'z' +# If you want to plot multiple cross sections use +# 'xy', 'xz', 'yz', 'xyz' +I_RunOps['plot_numerical_solution'] = ''; +I_RunOps['plot_analytical_solution'] = ''; +I_RunOps['plot_difference'] = ''; +I_RunOps['plot_divergence'] = ''; +#If set to 1 the magnetic field will be saved to I_Results('field_b') +I_RunOps['save_fields'] = False; +#If set to true the magnetic energy (L^2 norm) will be saved to I_Results('energy_over_time'), the +#L2 errors (if available) to I_Results('L2error_B_over_time') & I_Results('L2error_divB_over_time') +#and the time to I_Results('time') +I_RunOps['save_integrals_over_time'] = False; + +#Initialize the magnetic field, the velocity field and the density field +#according to the specified testcase. Also calculates and sets additional +#variables, e.g. stepsize, timestep, local work group size, etc... +(field_b_init, field_u_init, field_rho_init) = induction_eq.initialize(); + +print('Testcase: ' + I_RunOps['testcase'] +'\n') +print('Order:' +str(I_RunOps['order'])+ '\n') +print('Time integrator:' + I_TI['time_integrator'] + ' \n') +print('DT: '+ str(I_TI['DT']) + ' N_STEPS: ' + str(I_TI['num_steps']) + ' FINAL_TIME: '+str(I_TI['final_time'])+'\n') +print('DX: '+ str(I_Mesh['DX']) + ' NODES_X: '+str(I_Mesh['NODES_X']) +'\n') +print('DY: '+ str(I_Mesh['DY']) + ' NODES_Y: '+str(I_Mesh['NODES_Y'])+'\n') +print('DZ: '+ str(I_Mesh['DZ']) + ' NODES_Z: '+str(I_Mesh['NODES_Z'])+'\n') +print('Dissipation: '+I_IEq['dissipation']+' \n') +print('Divergence cleaning: '+str(I_DC['divergence_cleaning'])+' \n') +print('REAL:'+ I_Tech['REAL']+'\n') + +# Takes the initialized fields and advances the solution in time +induction_eq.compute_numerical_solution(field_b_init, field_u_init, field_rho_init); + +print('Divergence Norm: ' + str(I_Results['divergence_norm']) + ' Total Energy: '+str(I_Results['energy']) + '\n') +print('Relative Error:' + str(100*I_Results['rel_err']) + '%\n\n') From 2bf98977bee4cfaa99d111eaa8e6cda9318b6bac Mon Sep 17 00:00:00 2001 From: Simon-Christian Klein Date: Thu, 11 Oct 2018 11:17:49 +0200 Subject: [PATCH 2/2] changed the codingstyle and some lines as requested in PR #33. Added the timer and OpenCl device querry from the matlab version. found a bug from Fortran vs C array indexing --- .gitignore | 6 + python/InductionEq.py | 613 ++++++++++++++-------------- python/cl.py | 86 ++-- python/compute_error_rotation_2d.py | 76 ++-- 4 files changed, 400 insertions(+), 381 deletions(-) mode change 100644 => 100755 python/compute_error_rotation_2d.py diff --git a/.gitignore b/.gitignore index 6bfc794..4750550 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,9 @@ examples/*.m~ # temporary files for jupyter/ipython notebooks docs/.ipynb_checkpoints/ + + +# Python files +__pycache__/ +*.py[cod] +*$py.class diff --git a/python/InductionEq.py b/python/InductionEq.py index f471f38..0af9d02 100644 --- a/python/InductionEq.py +++ b/python/InductionEq.py @@ -1,18 +1,19 @@ import math import numpy as np -#import pyopencl as cl import cl +import sys +import time # from +InductionEq/preparse_vars.m def preparse_vars(): - global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps, I_Results - I_Mesh = {'NODES_X':0, 'NODES_Y':0, 'NODES_Z':0, 'DX':0, 'DY':0, 'DZ':0, 'XMIN':0, 'XMAX':0, 'YMAX':0, 'YMIN':0, 'ZMIN':0, 'ZMAX':0} - I_TI = {'cfl':0, 'final_time':0, 'time_integrator':'\0', 'DT':0, 'num_steps':0} - I_IEq = {'form_uibj':0, 'form_source':0, 'form_ujbi':0, 'dissipation':'NONE', 'dissipation_form':'\0', 'HO_DISSIPATION_FACTOR':1, 'hall_term':'NONE', 'g_range':0, 'l_range':0, 'MP2MIN':0, 'MP2MAX':0, 'CMIN':0, 'CMAX':0} - I_DC = {'use':0, 'form':'\0', 'BNODES':0, 'error_threshold':0, 'g_range':0, 'l_range':0,'max_iterations':0} - I_Tech = {'device':0, 'REAL':'\0','num_nodes_pad':0, 'num_groups':0,'W_SIZE':0, 'g_range':0, 'l_range':0, 'optimizations':'\0', 'context' : None} - I_RunOps = {'order':0, 'operator_form':'classical', 'testcase':'\0', 'variable_u':False, 'periodic':'NONE', 'plot_energy':0, 'plot_numerical_solution':0,'plot_analytical_solution':0, 'plot_difference':0, 'plot_divergence':0, 'save_fields':False, 'save_integrals_over_time':False} - I_Results = {'abs_err':0, 'rel_err':0, 'field_b':0, 'field_b_ana':0, 'energy':0, 'divergence_norm':0, 'runtime':0, 'kernel_runtime':0, 'energy_over_time':0, 'L2error_B_over_time':0, 'L2error_divB_over_time':0, 'time':0 } + global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps, I_Results + I_Mesh = {'NODES_X':0, 'NODES_Y':0, 'NODES_Z':0, 'DX':0, 'DY':0, 'DZ':0, 'XMIN':0, 'XMAX':0, 'YMAX':0, 'YMIN':0, 'ZMIN':0, 'ZMAX':0} + I_TI = {'cfl':0, 'final_time':0, 'time_integrator':'\0', 'DT':0, 'num_steps':0} + I_IEq = {'form_uibj':0, 'form_source':0, 'form_ujbi':0, 'dissipation':'NONE', 'dissipation_form':'\0', 'HO_DISSIPATION_FACTOR':1, 'hall_term':'NONE', 'g_range':0, 'l_range':0, 'MP2MIN':0, 'MP2MAX':0, 'CMIN':0, 'CMAX':0} + I_DC = {'use':0, 'form':'\0', 'BNODES':0, 'error_threshold':0, 'g_range':0, 'l_range':0,'max_iterations':0} + I_Tech = {'device':0, 'REAL':'\0','num_nodes_pad':0, 'num_groups':0,'W_SIZE':0, 'g_range':0, 'l_range':0, 'optimizations':'\0', 'context' : None} + I_RunOps = {'order':0, 'operator_form':'classical', 'testcase':'\0', 'variable_u':False, 'periodic':'NONE', 'plot_energy':0, 'plot_numerical_solution':0,'plot_analytical_solution':0, 'plot_difference':0, 'plot_divergence':0, 'save_fields':False, 'save_integrals_over_time':False} + I_Results = {'abs_err':0, 'rel_err':0, 'field_b':0, 'field_b_ana':0, 'energy':0, 'divergence_norm':0, 'runtime':0, 'kernel_runtime':0, 'energy_over_time':0, 'L2error_B_over_time':0, 'L2error_divB_over_time':0, 'time':0 } # from generate_settings.m @@ -21,164 +22,160 @@ def generate_settings(map, keys): # compiler optimizations and OpenCL defines of map with the given keys where # keys is a cell array, e.g. settings_mesh = generate_settings(I_Mesh, {'DX'; 'DY'; 'DZ'} - settings = ''; - for key in keys: - if key in map: - if not(type(map[key]) is float or type(map[key]) is int): - if key== 'optimizations': - settings = settings + map[key] - elif 'USE' in map[key]: - settings = settings + ' -D' + map[key] + '=1' - else: - settings = settings + ' -D' + key + '=' + str(map[key]) - else: - settings = settings + ' -D' + key + "=" + str(map[key]) - else: - print('Unknown identifier ' + keys[i] + '\n') - return settings + settings = ''; + for key in keys: + if key in map: + if not(type(map[key]) is float or type(map[key]) is int): + if key== 'optimizations': + settings = settings + map[key] + elif 'USE' in map[key]: + settings = settings + ' -D' + map[key] + '=1' + else: + settings = settings + ' -D' + key + '=' + str(map[key]) + else: + settings = settings + ' -D' + key + "=" + str(map[key]) + else: + print('Unknown identifier ' + keys[i]) + return settings # from +InductionEq/initialize.m def initialize(): - global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps + global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps # *NEW* generate opencl context with PyOpenCl - cl.create_ctx() - + cl.create_ctx() + I_Tech['device'] = cl.devices[0] # a python float is a 64bit float => float means double - if I_RunOps['periodic'] == 'USE_PERIODIC': - DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / float(I_Mesh['NODES_X']); - DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / float(I_Mesh['NODES_Y']); - DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / float(I_Mesh['NODES_Z']); - else: - DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / (float(I_Mesh['NODES_X'])-1); - DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / (float(I_Mesh['NODES_Y'])-1); - DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / (float(I_Mesh['NODES_Z'])-1); + if I_RunOps['periodic'] == 'USE_PERIODIC': + DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / float(I_Mesh['NODES_X']) + DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / float(I_Mesh['NODES_Y']) + DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / float(I_Mesh['NODES_Z']) + else: + DX = float(I_Mesh['XMAX'] - I_Mesh['XMIN']) / (float(I_Mesh['NODES_X'])-1) + DY = float(I_Mesh['YMAX'] - I_Mesh['YMIN']) / (float(I_Mesh['NODES_Y'])-1) + DZ = float(I_Mesh['ZMAX'] - I_Mesh['ZMIN']) / (float(I_Mesh['NODES_Z'])-1) + + I_Mesh['DX'] = DX; I_Mesh['DY'] = DY; I_Mesh['DZ'] = DZ + num_nodes = I_Mesh['NODES_X']*I_Mesh['NODES_Y']*I_Mesh['NODES_Z'] - I_Mesh['DX'] = DX; I_Mesh['DY'] = DY; I_Mesh['DZ'] = DZ; - num_nodes = I_Mesh['NODES_X']*I_Mesh['NODES_Y']*I_Mesh['NODES_Z']; -# later ... # type = dev_type(I_Tech('device')); # if (strcmp(type{1},'CPU')) # group_size = 16; # else -# if (num_nodes > lw_size(I_Tech('device'))) -# group_size = lw_size(I_Tech('device')); -# else -# group_size = 2^floor(log(num_nodes) / log(2)); -# end -# end -# FOR MY GPU - group_size = 1024 + if (num_nodes > cl.group_size): + group_size = cl.group_size + else: + group_size = 2^floor(log(num_nodes) / log(2)) - group_size = float(group_size); - num_nodes_pad = math.ceil(float(num_nodes)/group_size)*group_size - num_groups = math.ceil(num_nodes_pad/group_size) + group_size = float(group_size); + num_nodes_pad = math.ceil(float(num_nodes)/group_size)*group_size + num_groups = math.ceil(num_nodes_pad/group_size) - I_Tech['num_nodes_pad'] = int(num_nodes_pad) + I_Tech['num_nodes_pad'] = int(num_nodes_pad) - I_Tech['num_groups'] = int(num_groups); - I_Tech['W_SIZE'] = int(group_size); + I_Tech['num_groups'] = int(num_groups) + I_Tech['W_SIZE'] = int(group_size) # Initialise fields: magnetic, veclocity, and charge density - if I_Tech['REAL'] == 'float': - field_u_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) - field_b_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) - field_rho_init = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32)+1 - else: - field_u_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64) - field_b_init = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64) - field_rho_init = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64)+1 - + if I_Tech['REAL'] == 'float': + field_u_init = np.zeros((I_Tech['num_nodes_pad'], 4), dtype=np.float32) + field_b_init = np.zeros((I_Tech['num_nodes_pad'], 4), dtype=np.float32) + field_rho_init = np.ones((I_Tech['num_nodes_pad'], 1), dtype=np.float32) + else: + field_u_init = np.zeros((I_Tech['num_nodes_pad'], 4), dtype = np.float64) + field_b_init = np.zeros((I_Tech['num_nodes_pad'], 4), dtype = np.float64) + field_rho_init = np.ones((I_Tech['num_nodes_pad'], 1), dtype = np.float64) # Generate settings needed for computation of the initial state for the # specified testcase. - settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'optimizations']); - settings_mesh = generate_settings(I_Mesh, ['DX', 'DY', 'DZ', 'NODES_X', 'NODES_Y', 'NODES_Z','XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX']) - settings = settings_tech + settings_mesh + settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'optimizations']); + settings_mesh = generate_settings(I_Mesh, ['DX', 'DY', 'DZ', 'NODES_X', 'NODES_Y', 'NODES_Z','XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX']) + settings = settings_tech + settings_mesh - kernel_path_list = [] - kernel_path_list.append('../include/'+I_RunOps['testcase']+'.h') - kernel_path_list.append('../include/utils.h') - kernel_path_list.append('../kernel/kernel_init.cl') + kernel_path_list = [] + kernel_path_list.append('../include/'+I_RunOps['testcase']+'.h') + kernel_path_list.append('../include/utils.h') + kernel_path_list.append('../kernel/kernel_init.cl') - cl.compile_kernels(kernel_path_list, settings) + cl.compile_kernels(kernel_path_list, settings) # Set global and local range for calculation of the inital state # The global and local range can either be a vector of type uint32 or # double. Double is supported due to the fact that by default Matlab # stores all variables as double. - I_IEq['g_range'] = np.array([I_Mesh['NODES_X'], I_Mesh ['NODES_Y'], I_Mesh['NODES_Z']], dtype=np.int32) - I_IEq['l_range'] = np.array([0], dtype=np.int32) + I_IEq['g_range'] = np.array([I_Mesh['NODES_X'], I_Mesh ['NODES_Y'], I_Mesh['NODES_Z']], dtype=np.int32) + I_IEq['l_range'] = np.array([0], dtype=np.int32) # Compute initial state for magnetic field, velocity field and density # field - cl.run_kernel('init_b', I_IEq['g_range'], I_IEq['l_range'], field_b_init) - cl.run_kernel('init_u', I_IEq['g_range'], I_IEq['l_range'], field_u_init) - cl.run_kernel('init_rho', I_IEq['g_range'], I_IEq['l_range'], field_rho_init) + cl.run_kernel('init_b', I_IEq['g_range'], I_IEq['l_range'], field_b_init) + cl.run_kernel('init_u', I_IEq['g_range'], I_IEq['l_range'], field_u_init) + cl.run_kernel('init_rho', I_IEq['g_range'], I_IEq['l_range'], field_rho_init) + # Compute time step from CFL number, min step size and maximal velocity - u_max = max(np.sqrt(np.power(field_u_init[1,:], 2) + np.power(field_u_init[2,:],2) + np.power(field_u_init[3,:],2))) - dt = I_TI['cfl'] * min([DX, DY, DZ]) / float(u_max) - num_steps = math.ceil(I_TI['final_time']/dt) - dt = I_TI['final_time'] / num_steps + u_max = max(np.sqrt(np.power(field_u_init[:, 0], 2) + np.power(field_u_init[:, 1],2) + np.power(field_u_init[:, 2],2))) + dt = I_TI['cfl'] * min([DX, DY, DZ]) / float(u_max) + num_steps = math.ceil(I_TI['final_time']/dt) + dt = I_TI['final_time'] / num_steps - I_TI['DT'] = dt - I_TI['num_steps'] = num_steps + I_TI['DT'] = dt + I_TI['num_steps'] = num_steps # Global and local range for dot product and norm - I_Tech['g_range'] = np.array([I_Tech['num_nodes_pad'], 1, 1], dtype=np.uint32) - I_Tech['l_range'] = np.array([I_Tech['W_SIZE'], 1, 1], dtype=np.uint32) + I_Tech['g_range'] = np.array([I_Tech['num_nodes_pad'], 1, 1], dtype=np.uint32) + I_Tech['l_range'] = np.array([I_Tech['W_SIZE'], 1, 1], dtype=np.uint32) # Depending on the discretization of the laplace operator for # divergence cleaning set the number of boundary nodes - if I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_LNS': - I_DC['BNODES'] = 0; - elif (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_DIRICHLET') or (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_NARROW_STENCIL_DIRICHLET'): - I_DC['BNODES'] = 1; - else: - print('ERROR: Option I_DC(''divergence_cleaning_form'') = '+str(I_DC['divergence_cleaning_form']) + ' unknown.') + if I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_LNS': + I_DC['BNODES'] = 0 + elif (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_WIDE_STENCIL_DIRICHLET') or (I_DC['divergence_cleaning_form'] == 'USE_LAPLACE_NARROW_STENCIL_DIRICHLET'): + I_DC['BNODES'] = 1 + else: + print('ERROR: Option I_DC(''divergence_cleaning_form'') = '+str(I_DC['divergence_cleaning_form']) + ' unknown.') # Set global and local range for divergence cleaning - I_DC['g_range'] = np.array([I_Mesh['NODES_X']-2*I_DC['BNODES'], I_Mesh['NODES_Y']-2*I_DC['BNODES'], I_Mesh['NODES_Z']-2*I_DC['BNODES']], dtype=np.int32) - I_DC['l_range'] = np.array([0], dtype=np.int32) + I_DC['g_range'] = np.array([I_Mesh['NODES_X']-2*I_DC['BNODES'], I_Mesh['NODES_Y']-2*I_DC['BNODES'], I_Mesh['NODES_Z']-2*I_DC['BNODES']], dtype=np.int32) + I_DC['l_range'] = np.array([0], dtype=np.int32) - return field_b_init, field_u_init, field_rho_init + return field_b_init, field_u_init, field_rho_init def compute_numerical_solution_setup(field_b, field_u, field_rho): - global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps - - # Memory allocation for fields - if I_Tech['REAL'] == 'float': - # Initialize auxillary fields needed for time stepping - field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) - field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) - field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) - field_curlB_rho = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) - current_time = np.zeros(2, dtype=np.float32) # contains two elements: time at the beginning of a step and of a stage - norm2_output = np.zeros((1, I_Tech['num_groups']), dtype=np.float32) - # Allocate memory for field used for divergence cleaning - if I_DC['divergence_cleaning']: + global I_Mesh, I_TI, I_IEq, I_DC, I_Tech, I_RunOps + + # Memory allocation for fields + if I_Tech['REAL'] == 'float': + # Initialize auxillary fields needed for time stepping + field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) + field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) + field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) + field_curlB_rho = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float32) + current_time = np.zeros(2, dtype=np.float32) # contains two elements: time at the beginning of a step and of a stage + norm2_output = np.zeros((1, I_Tech['num_groups']), dtype=np.float32) + # Allocate memory for field used for divergence cleaning + if I_DC['divergence_cleaning']: #Field for divergence of b-field and phi - field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) - field_phi = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) + field_divB = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) + field_phi = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) #Fields needed for CG - field_laplace = np.zeros((1, I_Tech['num_nodes_pad']) , dtype=np.float32); - field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32); - field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype = np.float32); + field_laplace = np.zeros((1, I_Tech['num_nodes_pad']) , dtype=np.float32) + field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float32) + field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype = np.float32) - energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32); - L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32) - L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32); - else: + energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32) + L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32) + L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float32) + else: # Initialize auxillary fields needed for time stepping field_b2 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64) field_b3 = np.zeros((4, I_Tech['num_nodes_pad']), dtype=np.float64) @@ -196,240 +193,244 @@ def compute_numerical_solution_setup(field_b, field_u, field_rho): field_r = np.zeros((1, I_Tech['num_nodes_pad']), dtype=np.float64); field_u_div_cleaning = np.zeros((1, I_Tech['num_nodes_pad']), dtype =np.float64) - energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64); + energy = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64) L2error_B = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64) - L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64); + L2error_divB = np.zeros((1, I_TI['num_steps']+1), dtype=np.float64) - kernel_path_list = [] + kernel_path_list = [] # Include header file containing the coefficients of the respective order - if I_RunOps['order'] == 2: - if I_RunOps['operator_form'] == 'extended': - kernel_path_list.append('../include/2ndOrderExtended.h') - else: # classical operators - kernel_path_list.append('../include/2ndOrder.h') - - elif I_RunOps['order'] == 4: - if I_RunOps['operator_form'] == 'extended': - kernel_path_list.append('../include/4thOrderExtended.h') - else: # classical operators - kernel_path_list.append('../include/4thOrder.h') - - elif I_RunOps['order'] == 6: - if I_RunOps['operator_form'] == 'extended': - kernel_path_list.append('../include/6thOrderExtended.h') - else: # classical operators - kernel_path_list.append('../include/6thOrder.h') - - else: - print('Specify order \n') - - kernel_path_list.append('../include/utils.h') - kernel_path_list.append('../include/' + I_RunOps['testcase'] + '.h') - kernel_path_list.append('../kernel/kernel_init.cl') - - kernel_path_list.append('../kernel/SBP_operator.cl') - - kernel_path_list.append('../kernel/kernel_operator.cl') - - # Include files for divergence cleaning if desired - if I_DC['divergence_cleaning']: - pass + if I_RunOps['order'] == 2: + if I_RunOps['operator_form'] == 'extended': + kernel_path_list.append('../include/2ndOrderExtended.h') + else: # classical operators + kernel_path_list.append('../include/2ndOrder.h') + + elif I_RunOps['order'] == 4: + if I_RunOps['operator_form'] == 'extended': + kernel_path_list.append('../include/4thOrderExtended.h') + else: # classical operators + kernel_path_list.append('../include/4thOrder.h') + + elif I_RunOps['order'] == 6: + if I_RunOps['operator_form'] == 'extended': + kernel_path_list.append('../include/6thOrderExtended.h') + else: # classical operators + kernel_path_list.append('../include/6thOrder.h') + + else: + print('Specify order \n') + + kernel_path_list.append('../include/utils.h') + kernel_path_list.append('../include/' + I_RunOps['testcase'] + '.h') + kernel_path_list.append('../kernel/kernel_init.cl') + + kernel_path_list.append('../kernel/SBP_operator.cl') + + kernel_path_list.append('../kernel/kernel_operator.cl') + + # Include files for divergence cleaning if desired + if I_DC['divergence_cleaning']: + pass #Prepare temporary fields for divergence cleaning -# DC_fields = struct('divB',field_divB,'phi',field_phi,'laplace', field_laplace,'r', field_r,'u', field_u_div_cleaning,'output', norm2_output); +# DC_fields = struct('divB',field_divB,'phi',field_phi,'laplace', field_laplace,'r', field_r,'u', field_u_div_cleaning,'output', norm2_output); # kernel_path_list = [kernel_path_list, {'../kernel/kernel_div_cleaning.cl'}]; # kernel_path_list = [kernel_path_list, {'../kernel/kernel_CG.cl'}]; # settings_dc = generate_settings(I_DC, {'BNODES', 'divergence_cleaning_form'}); - else: - DC_fields = 0 - settings_dc ='' + else: + DC_fields = 0 + settings_dc ='' # Include files for artificial dissipation if desired - if I_IEq['dissipation'] == 'USE_ARTIFICIAL_DISSIPATION': - pass -# kernel_path_list = [kernel_path_list, {'../include/artificial_dissipation.h'}]; -# kernel_path_list = [kernel_path_list, {'../kernel/artificial_dissipation.cl'}]; -# settings_dissipation = generate_settings(I_IEq, {'dissipation', 'dissipation_form', 'HO_DISSIPATION_FACTOR'}); - - if I_IEq['dissipation_form'] == 'USE_ADAPTIVE_DISSIPATION': - pass -# settings_dissipation = strcat(settings_dissipation, generate_settings(I_IEq, {'MP2MIN'; 'MP2MAX'; 'CMIN'; 'CMAX'})); - else: - settings_dissipation = '' - - kernel_path_list.append('../kernel/induction_Eq_volume.cl') - kernel_path_list.append('../kernel/induction_Eq_surface.cl') - kernel_path_list.append('../kernel/kernel_dot_product.cl') - kernel_path_list.append('../kernel/kernel_time_integrator.cl') + if I_IEq['dissipation'] == 'USE_ARTIFICIAL_DISSIPATION': + pass +# kernel_path_list = [kernel_path_list, {'../include/artificial_dissipation.h'}]; +# kernel_path_list = [kernel_path_list, {'../kernel/artificial_dissipation.cl'}]; +# settings_dissipation = generate_settings(I_IEq, {'dissipation', 'dissipation_form', 'HO_DISSIPATION_FACTOR'}); + + if I_IEq['dissipation_form'] == 'USE_ADAPTIVE_DISSIPATION': + pass +# settings_dissipation = strcat(settings_dissipation, generate_settings(I_IEq, {'MP2MIN'; 'MP2MAX'; 'CMIN'; 'CMAX'})); + else: + settings_dissipation = '' + + kernel_path_list.append('../kernel/induction_Eq_volume.cl') + kernel_path_list.append('../kernel/induction_Eq_surface.cl') + kernel_path_list.append('../kernel/kernel_dot_product.cl') + kernel_path_list.append('../kernel/kernel_time_integrator.cl') # Specify compile settings - settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'W_SIZE', 'optimizations']) + settings_tech = generate_settings(I_Tech, ['REAL', 'REAL4', 'W_SIZE', 'optimizations']) - settings_mesh = generate_settings(I_Mesh, ['DX', 'DY', 'DZ','NODES_X', 'NODES_Y', 'NODES_Z', 'XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX']) + settings_mesh = generate_settings(I_Mesh, ['DX', 'DY', 'DZ','NODES_X', 'NODES_Y', 'NODES_Z', 'XMIN', 'XMAX', 'YMIN', 'YMAX','ZMIN', 'ZMAX']) - settings_induction_eq = generate_settings(I_IEq, ['form_uibj', 'form_source', 'form_ujbi', 'hall_term']) + settings_induction_eq = generate_settings(I_IEq, ['form_uibj', 'form_source', 'form_ujbi', 'hall_term']) - settings_time_integration = generate_settings(I_TI, ['DT']) + settings_time_integration = generate_settings(I_TI, ['DT']) - settings_runops = generate_settings(I_RunOps, ['periodic']) + settings_runops = generate_settings(I_RunOps, ['periodic']) - settings = settings_induction_eq + settings_time_integration + settings_tech + settings_mesh + settings_dissipation + settings_dc + settings_runops + settings = settings_induction_eq + settings_time_integration + settings_tech + settings_mesh + settings_dissipation + settings_dc + settings_runops - if I_TI['time_integrator'] == 'SSPRK33': + if I_TI['time_integrator'] == 'SSPRK33': # 'SSPRK33_1', 'SSPRK33_2a', 'SSPRK33_2b', 'SSPRK33_3a', 'SSPRK33_3b'}; # calc_curlB_rho = {'calc_curlB_rho_1_3args', 'calc_curlB_rho_2_3args', 'calc_curlB_rho_3_3args'}; - RK_Step = []; - if I_IEq['hall_term'] == 'USE_HALL': - RK_Step.append('calc_curlB_rho_1_3args') - if I_RunOps['variable_u']: - RK_Step.append('calc_u_3_args') - RK_Step.append('SSPRK33_1') - - RK_Step.append('SSPRK33_2a') - if I_IEq['hall_term']== 'USE_HALL': - RK_Step.append('calc_curlB_rho_2_3args') - if I_RunOps['variable_u']: - RK_Step.append('calc_u_3_args') - RK_Step.append('SSPRK33_2b') - - RK_Step.append('SSPRK33_3a') - if I_IEq['hall_term'] == 'USE_HALL': - RK_Step.append('calc_curlB_rho_3_3args') - if I_RunOps['variable_u']: - RK_Step.append('calc_u_3_args') - RK_Step.append('SSPRK33_3b') - - RK_Step.append('calc_time_3_args') - - time_integrator_num_fields = 3; + RK_Step = []; + if I_IEq['hall_term'] == 'USE_HALL': + RK_Step.append('calc_curlB_rho_1_3args') + if I_RunOps['variable_u']: + RK_Step.append('calc_u_3_args') + RK_Step.append('SSPRK33_1') + + RK_Step.append('SSPRK33_2a') + if I_IEq['hall_term']== 'USE_HALL': + RK_Step.append('calc_curlB_rho_2_3args') + if I_RunOps['variable_u']: + RK_Step.append('calc_u_3_args') + RK_Step.append('SSPRK33_2b') + + RK_Step.append('SSPRK33_3a') + if I_IEq['hall_term'] == 'USE_HALL': + RK_Step.append('calc_curlB_rho_3_3args') + if I_RunOps['variable_u']: + RK_Step.append('calc_u_3_args') + RK_Step.append('SSPRK33_3b') + + RK_Step.append('calc_time_3_args') + + time_integrator_num_fields = 3 # switch time_integrator_num_fields - if time_integrator_num_fields == 2: - IEq_fields = {'field_b':field_b, \ - 'field_b2': field_b2, \ - 'field_curlB_rho': field_curlB_rho, \ - 'field_u': field_u, \ - 'field_rho':field_rho, \ - 'current_time':current_time, \ - 'field_divB': field_divB, \ - 'norm2_output':norm2_output, \ - 'energy': energy, \ - 'L2error_B': L2error_B, \ - 'L2error_divB': L2error_divB} - elif time_integrator_num_fields == 3: - IEq_fields = {'field_b': field_b, \ - 'field_b2': field_b2, \ - 'field_b3': field_b3, \ - 'field_curlB_rho': field_curlB_rho, \ - 'field_u': field_u, \ - 'field_rho': field_rho, \ - 'current_time': current_time, \ - 'field_divB': field_divB, \ + if time_integrator_num_fields == 2: + IEq_fields = {'field_b':field_b, \ + 'field_b2': field_b2, \ + 'field_curlB_rho': field_curlB_rho, \ + 'field_u': field_u, \ + 'field_rho':field_rho, \ + 'current_time':current_time, \ + 'field_divB': field_divB, \ + 'norm2_output':norm2_output, \ + 'energy': energy, \ + 'L2error_B': L2error_B, \ + 'L2error_divB': L2error_divB} + elif time_integrator_num_fields == 3: + IEq_fields = {'field_b': field_b, \ + 'field_b2': field_b2, \ + 'field_b3': field_b3, \ + 'field_curlB_rho': field_curlB_rho, \ + 'field_u': field_u, \ + 'field_rho': field_rho, \ + 'current_time': current_time, \ + 'field_divB': field_divB, \ 'norm2_output': norm2_output,\ 'energy': energy,\ 'L2error_B': L2error_B, \ 'L2error_divB': L2error_divB} - else: - print('Wrong value of time_integrator_num_fields =' + str( time_integrator_num_fields)) + else: + raise Exception('Wrong value of time_integrator_num_fields =' + str( time_integrator_num_fields)) - return kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step + return kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step def compute_numerical_solution(field_b, field_u, field_rho): - global I_mesh, I_TI, I_IEq, I_Tech, I_RunOps, I_Results + global I_mesh, I_TI, I_IEq, I_Tech, I_RunOps, I_Results # setup fields, settings etc. - (kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step) = compute_numerical_solution_setup(field_b, field_u, field_rho) + (kernel_path_list, settings, IEq_fields, time_integrator_num_fields, DC_fields, RK_Step) = compute_numerical_solution_setup(field_b, field_u, field_rho) # Initialise logging variables - kernel_runtime = 0; - RK_block_size = 15000; - if I_RunOps['save_integrals_over_time']: - energy = IEq_fields['energy'] - L2error_B = IEq_fields['L2error_B'] - L2error_divB = IEq_fields['L2error_divB'] + kernel_runtime = 0; + RK_block_size = 15000; + if I_RunOps['save_integrals_over_time']: + energy = IEq_fields['energy'] + L2error_B = IEq_fields['L2error_B'] + L2error_divB = IEq_fields['L2error_divB'] - norm2_output = IEq_fields['norm2_output'] + norm2_output = IEq_fields['norm2_output'] # Compile kernel - cl.compile_kernels(kernel_path_list, settings); + cl.compile_kernels(kernel_path_list, settings) + + t1 = time.clock() # Switch case for time integrator - if time_integrator_num_fields == 2: - print("time_integrator_num_fields = 2 not ready yet \n") - elif time_integrator_num_fields == 3: - field_b = IEq_fields['field_b'] - field_b2 = IEq_fields['field_b2'] - field_b3 = IEq_fields['field_b3'] - field_curlB_rho = IEq_fields['field_curlB_rho'] - field_u = IEq_fields['field_u'] - field_rho = IEq_fields['field_rho'] - current_time = IEq_fields['current_time'] - field_divB = IEq_fields['field_divB'] - - if False: #other versions - pass - else: - num_steps_run = I_TI['num_steps']; - while num_steps_run > RK_block_size: - kernel_list = RK_Step * RK_block_size - t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\ - field_b, field_b2, field_b3, field_curlB_rho,\ - field_u, field_rho, current_time) - kernel_runtime = kernel_runtime + t; - num_steps_run = num_steps_run - RK_block_size; - - if num_steps_run > 0: - kernel_list = RK_Step *num_steps_run - t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\ - field_b, field_b2, field_b3, field_curlB_rho,\ - field_u, field_rho, current_time); - kernel_runtime = kernel_runtime + t; - - else: - print('Wrong value of time_integrator_num_fields = ' + str(time_integrator_num_fields) + ' \n') - #runtime = toc; - runtime = 0 - print('Elapsed time is' + str(runtime) + 'seconds.\n') - - # Save total runtime and kernel runtime - I_Results['runtime'] = runtime; - I_Results['kernel_runtime'] = kernel_runtime; - - # save fields - if I_RunOps['save_fields']: - I_Results['field_b'] = field_b; - - field_b_ana = IEq_fields['field_b2'] - - #Calculate analytical solution and error - current_time[1] = I_TI['final_time'] - cl.run_kernel('analytical_b', I_IEq['g_range'], I_IEq['l_range'], field_b_ana, current_time); - - norm2_output[:] = 0; - - cl.run_kernel('norm2_diff', I_Tech['g_range'], I_Tech['l_range'], field_b, field_b_ana, norm2_output); - I_Results['abs_err'] = math.sqrt(sum(sum(norm2_output))); - - - norm2_output[:] = 0; - cl.run_kernel( 'norm2', I_Tech['g_range'], I_Tech['l_range'], field_b_ana, norm2_output); - I_Results['rel_err'] = I_Results['abs_err'] / math.sqrt(norm2_output.sum()); - - - #Calculate divergence - cl.run_kernel('calc_div', I_IEq['g_range'], I_IEq['l_range'], field_b, field_divB); - norm2_output[:] = 0; - cl.run_kernel('norm2_S', I_Tech['g_range'], I_Tech['l_range'], field_divB, norm2_output); - I_Results['divergence_norm'] = math.sqrt(norm2_output.sum()); - - - #Calculate energy - norm2_output[:] = 0; - cl.run_kernel('norm2', I_Tech['g_range'], I_Tech['l_range'], field_b, norm2_output); - I_Results['energy'] = norm2_output.sum(); + if time_integrator_num_fields == 2: + print("time_integrator_num_fields = 2 not ready yet \n") + elif time_integrator_num_fields == 3: + field_b = IEq_fields['field_b'] + field_b2 = IEq_fields['field_b2'] + field_b3 = IEq_fields['field_b3'] + field_curlB_rho = IEq_fields['field_curlB_rho'] + field_u = IEq_fields['field_u'] + field_rho = IEq_fields['field_rho'] + current_time = IEq_fields['current_time'] + field_divB = IEq_fields['field_divB'] + + if False: #other versions + pass + else: + num_steps_run = I_TI['num_steps']; + while num_steps_run > RK_block_size: + kernel_list = RK_Step * RK_block_size + t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\ + field_b, field_b2, field_b3, field_curlB_rho,\ + field_u, field_rho, current_time) + kernel_runtime = kernel_runtime + t + num_steps_run = num_steps_run - RK_block_size + + if num_steps_run > 0: + kernel_list = RK_Step *num_steps_run + t = cl.run_kernel(kernel_list, I_IEq['g_range'], I_IEq['l_range'],\ + field_b, field_b2, field_b3, field_curlB_rho,\ + field_u, field_rho, current_time) + kernel_runtime = kernel_runtime + t + + else: + raise Exception('Wrong value of time_integrator_num_fields = ' + str(time_integrator_num_fields) + ' \n') + runtime = time.clock() - t1 + print('Elapsed time is ' + str(runtime) + ' seconds.\n') + + # Save total runtime and kernel runtime + I_Results['runtime'] = runtime + I_Results['kernel_runtime'] = kernel_runtime + + # save fields + if I_RunOps['save_fields']: + I_Results['field_b'] = field_b + + field_b_ana = IEq_fields['field_b2'] + + #Calculate analytical solution and error + current_time[1] = I_TI['final_time'] + cl.run_kernel('analytical_b', I_IEq['g_range'], I_IEq['l_range'], field_b_ana, current_time) + + norm2_output[:] = 0 + + cl.run_kernel('norm2_diff', I_Tech['g_range'], I_Tech['l_range'], field_b, field_b_ana, norm2_output) + I_Results['abs_err'] = math.sqrt(sum(sum(norm2_output))) + + + norm2_output[:] = 0 + cl.run_kernel( 'norm2', I_Tech['g_range'], I_Tech['l_range'], field_b_ana, norm2_output) + I_Results['rel_err'] = I_Results['abs_err'] / math.sqrt(norm2_output.sum()) + + + #Calculate divergence + cl.run_kernel('calc_div', I_IEq['g_range'], I_IEq['l_range'], field_b, field_divB) + norm2_output[:] = 0 + cl.run_kernel('norm2_S', I_Tech['g_range'], I_Tech['l_range'], field_divB, norm2_output) + I_Results['divergence_norm'] = math.sqrt(norm2_output.sum()) + + + #Calculate energy + norm2_output[:] = 0; + cl.run_kernel('norm2', I_Tech['g_range'], I_Tech['l_range'], field_b, norm2_output) + I_Results['energy'] = norm2_output.sum() + +if sys.version_info[0] < 3: + raise Exception("Must be using Python 3") diff --git a/python/cl.py b/python/cl.py index cc177f5..96de6c4 100644 --- a/python/cl.py +++ b/python/cl.py @@ -1,56 +1,62 @@ import pyopencl as cl def compile_kernels(src_path, cmd_line, *args): - global program, ctx - src = ' ' - for entry in src_path: - file = open(entry, 'r') - src = src + file.read() - file.close() - prg = cl.Program(ctx, src).build(options=cmd_line) - program = prg - return prg + global program, ctx + src = ' ' + for entry in src_path: + with open(entry, 'r') as io: + src = src + io.read() + prg = cl.Program(ctx, src).build(options=cmd_line) + program = prg + return prg def create_ctx(): - global ctx, queue, PYOPENCL_COMPILER_OUTPUT - PYOPENCL_COMPILER_OUTPUT=1 - ctx = cl.create_some_context() - queue = cl.CommandQueue(ctx) - return ctx, queue + global ctx, queue, PYOPENCL_COMPILER_OUTPUT, devices, group_size + PYOPENCL_COMPILER_OUTPUT=1 + ctx = cl.create_some_context() + queue = cl.CommandQueue(ctx) + devices = ctx.devices + group_size = devices[0].max_work_group_size + return ctx, queue def run_kernel(names, g_range, l_range, *arguments): - global queue, program - inputbuffers = [] - for arg in arguments: - inputbuffers.append(cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = arg)) - callstrings = [] - if type(names) is list: - for name in names: - callstrings.append('program.' + name + '(queue, tuple(g_range) , None, *inputbuffers)') - else: - callstrings.append('program.' + names + '(queue, tuple(g_range), None, *inputbuffers)') - - for callstring in callstrings: - event = eval(callstring) -# event.wait() - i = 0 - for arg in arguments: - cl.enqueue_copy(queue, arg, inputbuffers[i]) -# inputbuffers[i].release() - i = i +1 - return 0 + global queue, program + inputbuffers = [] + for arg in arguments: + inputbuffers.append(cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = arg)) + callstrings = [] + if l_range[0] == 0 and len(l_range) == 1: + l_range = None + else: + l_range = tuple(l_range) + + if type(names) is list: + for name in names: + callstrings.append('program.' + name + '(queue, tuple(g_range) , l_range, *inputbuffers)') + else: + callstrings.append('program.' + names + '(queue, tuple(g_range), l_range, *inputbuffers)') + + for callstring in callstrings: + event = eval(callstring) + event.wait() + i = 0 + for arg in arguments: + cl.enqueue_copy(queue, arg, inputbuffers[i]).wait() + inputbuffers[i].release() + i = i +1 + return 0 def run_kernel_ref(name, g_range, l_range, *arg_refs): - global queue, program - callstring = 'program.' + name + '(queue, None, None, *inputbuffers)' - eval(callstring) - return + global queue, program + callstring = 'program.' + name + '(queue, None, None, *inputbuffers)' + eval(callstring) + return def allocate_buf(array): - return cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = array) + return cl.Buffer(ctx, cl.mem_flags.COPY_HOST_PTR, hostbuf = array) def malloc(size): - return cl.Buffer(ctx, size=size) + return cl.Buffer(ctx, size=size) diff --git a/python/compute_error_rotation_2d.py b/python/compute_error_rotation_2d.py old mode 100644 new mode 100755 index e5cd9ed..67f7bb2 --- a/python/compute_error_rotation_2d.py +++ b/python/compute_error_rotation_2d.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 # -*- coding: -*- #This project is licensed under the terms of the Creative Commons CC BY-NC-ND 3.0 license. @@ -9,6 +10,7 @@ #enabled, e.g. divergence cleaning. import InductionEq as induction_eq from math import pi +import sys #prepare_vars() initializes containers (maps) in which all variables (label, value) are #stored. Variables are grouped together by purpose: #I_Mesh: Contains all variables concerning the discrete mesh, e.g. @@ -30,7 +32,11 @@ #or to switch between different discretizations. In case the key is written #in capital letters the program define will be set to the corresponding #value of the key. -induction_eq.preparse_vars(); +if sys.version_info[0] < 3: + raise Exception("Must be using Python 3") + + +induction_eq.preparse_vars() #global I_Mesh I_TI I_IEq I_DC I_Tech I_RunOps I_Results @@ -67,17 +73,17 @@ # Induction equation related variables #Specify how the three part of the linear induction equation shall be #computed. -I_IEq['form_uibj'] = 'USE_UIBJ_PRODUCT'; # PRODUCT, SPLIT, CENTRAL -I_IEq['form_source'] = 'USE_SOURCE_CENTRAL'; # CENTRAL, SPLIT, ZERO -I_IEq['form_ujbi'] = 'USE_UJBI_CENTRAL'; # SPLIT, CENTRAL, PRODUCT +I_IEq['form_uibj'] = 'USE_UIBJ_PRODUCT' # PRODUCT, SPLIT, CENTRAL +I_IEq['form_source'] = 'USE_SOURCE_CENTRAL' # CENTRAL, SPLIT, ZERO +I_IEq['form_ujbi'] = 'USE_UJBI_CENTRAL' # SPLIT, CENTRAL, PRODUCT #Enable or disable Hall-Term -I_IEq['hall_term'] = 'NONE'; # NONE, USE_HALL +I_IEq['hall_term'] = 'NONE' # NONE, USE_HALL #Enable or disable artificial dissipation -I_IEq['dissipation'] = 'NONE'; # NONE, USE_ARTIFICIAL_DISSIPATION +I_IEq['dissipation'] = 'NONE' # NONE, USE_ARTIFICIAL_DISSIPATION #Specify what kind of artifical dissipation shall be used #USE_ADAPTIVE_DISSIPATION, USE_FIRST_ORDER_DISSIPATION, USE_HIGH_ORDER_DISSIPATION -I_IEq['dissipation_form'] = 'USE_HIGH_ORDER_DISSIPATION'; -I_IEq['HO_DISSIPATION_FACTOR'] = 1; # a constant (non-negative) factor adapting the influence of the high order dissipation +I_IEq['dissipation_form'] = 'USE_HIGH_ORDER_DISSIPATION' +I_IEq['HO_DISSIPATION_FACTOR'] = 1 # a constant (non-negative) factor adapting the influence of the high order dissipation #Additional parameters needed for adaptive dissipation. For typical values #see Svärd et al. (2009). I_IEq['MP2MIN'] = 1 @@ -93,31 +99,31 @@ I_DC['divergence_cleaning_form'] = 'USE_LAPLACE_WIDE_STENCIL_LNS' #USE_LAPLACE_WIDE_STENCIL_LNS USE_LAPLACE_WIDE_STENCIL_DIRICHLET USE_LAPLACE_NARROW_STENCIL_DIRICHLET #Specify a threshold for the divergence norm -I_DC['absolute_error_threshold'] = 1e-3; +I_DC['absolute_error_threshold'] = 1e-3 #The divergence cleaner will exit after max_iterations even if the error #threshold is not reached -I_DC['max_iterations'] = 50; +I_DC['max_iterations'] = 50 # Technical details # Specify the OpenCL device that will be used for computation -I_Tech['device'] = 1; +I_Tech['device'] = 1 # Switch between single floating point and double floating point precision I_Tech['REAL'] = 'double' # float -I_Tech['REAL4'] = I_Tech['REAL'] + '4'; #Vector datatype +I_Tech['REAL4'] = I_Tech['REAL'] + '4' #Vector datatype #Compiler based optimizations if I_Tech['REAL'] == 'float': - I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only -cl-single-precision-constant'; + I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only -cl-single-precision-constant' else: - I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only'; + I_Tech['optimizations'] = ' -cl-mad-enable -cl-no-signed-zeros -cl-finite-math-only' # Options relevant for a run # Defines the order -I_RunOps['order'] = 4; # 2, 4, 6 -I_RunOps['operator_form'] = 'classical'; # 'classical' or 'extended' operators +I_RunOps['order'] = 4 # 2, 4, 6 +I_RunOps['operator_form'] = 'classical' # 'classical' or 'extended' operators # Specify the testcase. The name of the testcase has to be equal to the # name of a header file which contains a function describing the initial state. # Example testcases are: @@ -131,35 +137,35 @@ # 'x', 'y', 'z' # If you want to plot multiple cross sections use # 'xy', 'xz', 'yz', 'xyz' -I_RunOps['plot_numerical_solution'] = ''; -I_RunOps['plot_analytical_solution'] = ''; -I_RunOps['plot_difference'] = ''; -I_RunOps['plot_divergence'] = ''; +I_RunOps['plot_numerical_solution'] = '' +I_RunOps['plot_analytical_solution'] = '' +I_RunOps['plot_difference'] = '' +I_RunOps['plot_divergence'] = '' #If set to 1 the magnetic field will be saved to I_Results('field_b') -I_RunOps['save_fields'] = False; +I_RunOps['save_fields'] = False #If set to true the magnetic energy (L^2 norm) will be saved to I_Results('energy_over_time'), the #L2 errors (if available) to I_Results('L2error_B_over_time') & I_Results('L2error_divB_over_time') #and the time to I_Results('time') -I_RunOps['save_integrals_over_time'] = False; +I_RunOps['save_integrals_over_time'] = False #Initialize the magnetic field, the velocity field and the density field #according to the specified testcase. Also calculates and sets additional #variables, e.g. stepsize, timestep, local work group size, etc... -(field_b_init, field_u_init, field_rho_init) = induction_eq.initialize(); - -print('Testcase: ' + I_RunOps['testcase'] +'\n') -print('Order:' +str(I_RunOps['order'])+ '\n') -print('Time integrator:' + I_TI['time_integrator'] + ' \n') -print('DT: '+ str(I_TI['DT']) + ' N_STEPS: ' + str(I_TI['num_steps']) + ' FINAL_TIME: '+str(I_TI['final_time'])+'\n') -print('DX: '+ str(I_Mesh['DX']) + ' NODES_X: '+str(I_Mesh['NODES_X']) +'\n') -print('DY: '+ str(I_Mesh['DY']) + ' NODES_Y: '+str(I_Mesh['NODES_Y'])+'\n') -print('DZ: '+ str(I_Mesh['DZ']) + ' NODES_Z: '+str(I_Mesh['NODES_Z'])+'\n') -print('Dissipation: '+I_IEq['dissipation']+' \n') -print('Divergence cleaning: '+str(I_DC['divergence_cleaning'])+' \n') -print('REAL:'+ I_Tech['REAL']+'\n') +(field_b_init, field_u_init, field_rho_init) = induction_eq.initialize() + +print('Testcase: ' + I_RunOps['testcase']) +print('Order:' + str(I_RunOps['order'])) +print('Time integrator:' + I_TI['time_integrator']) +print('DT: '+ str(I_TI['DT']) + ' N_STEPS: ' + str(I_TI['num_steps']) + ' FINAL_TIME: '+str(I_TI['final_time'])) +print('DX: '+ str(I_Mesh['DX']) + ' NODES_X: '+str(I_Mesh['NODES_X'])) +print('DY: '+ str(I_Mesh['DY']) + ' NODES_Y: '+str(I_Mesh['NODES_Y'])) +print('DZ: '+ str(I_Mesh['DZ']) + ' NODES_Z: '+str(I_Mesh['NODES_Z'])) +print('Dissipation: '+I_IEq['dissipation']) +print('Divergence cleaning: '+str(I_DC['divergence_cleaning'])) +print('REAL:'+ I_Tech['REAL']) # Takes the initialized fields and advances the solution in time induction_eq.compute_numerical_solution(field_b_init, field_u_init, field_rho_init); -print('Divergence Norm: ' + str(I_Results['divergence_norm']) + ' Total Energy: '+str(I_Results['energy']) + '\n') +print('Divergence Norm: ' + str(I_Results['divergence_norm']) + ' Total Energy: '+str(I_Results['energy'])) print('Relative Error:' + str(100*I_Results['rel_err']) + '%\n\n')