From 5790e4d99fe208bae4817685e30a1f389a3a25a5 Mon Sep 17 00:00:00 2001 From: Leonardo Solis Date: Sun, 24 Feb 2019 22:32:17 +0100 Subject: [PATCH] #39, cleaned host --- .../host/src/performdocking.cpp | 1109 +++++++---------- 1 file changed, 427 insertions(+), 682 deletions(-) diff --git a/ofdock_taskpar_alt/host/src/performdocking.cpp b/ofdock_taskpar_alt/host/src/performdocking.cpp index 4ba2fc0..b992106 100644 --- a/ofdock_taskpar_alt/host/src/performdocking.cpp +++ b/ofdock_taskpar_alt/host/src/performdocking.cpp @@ -110,10 +110,6 @@ static cl_kernel kernel_igl_arbiter = NULL; static const char *name_krnl_igl_arbiter = "Krnl_IGL_Arbiter"; #endif - - - - static cl_program program = NULL; // Function prototypes @@ -125,10 +121,6 @@ static void device_info_bool ( cl_device_id device, cl_device_info param, const static void device_info_string( cl_device_id device, cl_device_info param, const char* name); static void display_device_info( cl_device_id device ); - - - - //// -------------------------------- //// Host constant struct //// -------------------------------- @@ -184,23 +176,6 @@ cl_mem mem_KerConstStatic_rotbonds_moving_vectors_const; cl_mem mem_KerConstStatic_rotbonds_unit_vectors_const; cl_mem mem_KerConstStatic_ref_orientation_quats_const; -/* // Nr elements // Nr bytes -cl_mem mem_atom_charges_const; // float [MAX_NUM_OF_ATOMS]; // 90 = 90 //360 -cl_mem mem_atom_types_const; // char [MAX_NUM_OF_ATOMS]; // 90 = 90 //360 -cl_mem mem_intraE_contributors_const; // char [3*MAX_INTRAE_CONTRIBUTORS]; // 3*8128=28384 //24384 -cl_mem mem_VWpars_AC_const; // float [MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES]; // 14*14 = 196 //784 -cl_mem mem_VWpars_BD_const; // float [MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES]; // 14*14 = 196 //784 -cl_mem mem_dspars_S_const; // float [MAX_NUM_OF_ATYPES]; // 14 = 14 //56 -cl_mem mem_dspars_V_const; // float [MAX_NUM_OF_ATYPES]; // 14 = 14 //56 -cl_mem mem_rotlist_const; // int [MAX_NUM_OF_ROTATIONS]; // 4096 = 4096 //16384 -cl_mem mem_ref_coords_x_const; // float [MAX_NUM_OF_ATOMS]; // 90 = 90 //360 -cl_mem mem_ref_coords_y_const; // float [MAX_NUM_OF_ATOMS]; // 90 = 90 //360 -cl_mem mem_ref_coords_z_const; // float [MAX_NUM_OF_ATOMS]; // 90 = 90 //360 -cl_mem mem_rotbonds_moving_vectors_const;// float [3*MAX_NUM_OF_ROTBONDS]; // 3*32 = 96 //384 -cl_mem mem_rotbonds_unit_vectors_const; // float [3*MAX_NUM_OF_ROTBONDS]; // 3*32 = 96 //384 -cl_mem mem_ref_orientation_quats_const; // float [4*MAX_NUM_OF_RUNS]; // 4*100 = 400 //1600 -*/ - cl_mem mem_dockpars_fgrids; #if defined(SEPARATE_FGRID_INTERE) @@ -210,9 +185,6 @@ cl_mem mem_dockpars_fgrids3; cl_mem mem_dockpars_conformations_current; cl_mem mem_dockpars_energies_current; -/* -cl_mem mem_dockpars_prng_states; -*/ #if defined(SINGLE_COPY_POP_ENE) cl_mem mem_evals_performed; @@ -221,49 +193,6 @@ cl_mem mem_gens_performed; cl_mem mem_evals_and_generations_performed; #endif - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//#if defined (FIXED_POINT_INTERE) -#if 0 -//#include "defines_fixedpt_64.h" -fixedpt64* cpu_fixedpt64grids; - -#endif - - - //// -------------------------------- //// Docking //// -------------------------------- @@ -301,13 +230,7 @@ filled with clock() */ printf("Init complete!\n"); fflush(stdout); Liganddata myligand_reference; - //Dockparameters dockpars; - -//#if defined (FIXED_POINT_INTERE) -#if 0 - size_t size_fixedpt64grids; -#else size_t size_floatgrids; #if defined(SEPARATE_FGRID_INTERE) @@ -315,45 +238,46 @@ filled with clock() */ size_t size_floatgrids3; #endif -#endif - size_t size_populations; size_t size_energies; size_t size_prng_seeds; -#if defined(SINGLE_COPY_POP_ENE) + + #if defined(SINGLE_COPY_POP_ENE) size_t size_evals_of_runs; -#endif + #endif clock_t clock_start_docking; clock_t clock_stop_docking; clock_t clock_stop_program_before_clustering; //allocating CPU memory for initial populations -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) size_populations = mypars->num_of_runs * mypars->pop_size * ACTUAL_GENOTYPE_LENGTH * sizeof(float); -#else + #else //size_populations = mypars->pop_size * GENOTYPE_LENGTH_IN_GLOBMEM * sizeof(float); size_populations = mypars->pop_size * ACTUAL_GENOTYPE_LENGTH * sizeof(float); -#endif + #endif + cpu_init_populations = (float*) alignedMalloc(size_populations); memset(cpu_init_populations, 0, size_populations); //allocating CPU memory for results -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) size_energies = mypars->num_of_runs * mypars->pop_size * sizeof(float); -#else + #else size_energies = mypars->pop_size * sizeof(float); -#endif + #endif + cpu_energies = (float*) alignedMalloc(size_energies); cpu_result_ligands = (Ligandresult*) alignedMalloc(sizeof(Ligandresult)*(mypars->num_of_runs)); cpu_final_populations = (float*) alignedMalloc(size_populations); //allocating memory in CPU for reference orientation angles -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) cpu_ref_ori_angles = (float*) alignedMalloc(mypars->num_of_runs*3*sizeof(float)); -#else + #else cpu_ref_ori_angles = (float*) alignedMalloc(3*sizeof(float)); -#endif + #endif //generating initial populations and random orientation angles of reference ligand //(ligand will be moved to origo and scaled as well) @@ -368,16 +292,14 @@ filled with clock() */ genseed(time(NULL)); //initializing seed generator for (unsigned int i=0; inum_of_runs; i++) { -#if defined (REPRO) + #if defined (REPRO) cpu_prng_seeds[i] = 1u; -#else + #else cpu_prng_seeds[i] = genseed(0u); -#endif + #endif } - //srand(time(NULL)); - -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) // allocating memory in CPU for evaluation counters size_evals_of_runs = mypars->num_of_runs*sizeof(int); cpu_evals_of_runs = (int*) alignedMalloc(size_evals_of_runs); @@ -386,7 +308,7 @@ filled with clock() */ // allocating memory in CPU for generation counters cpu_gens_of_runs = (int*) alignedMalloc(size_evals_of_runs); memset(cpu_gens_of_runs, 0, size_evals_of_runs); -#endif + #endif //preparing the constant data fields for the GPU // ---------------------------------------------------------------------- @@ -395,11 +317,11 @@ filled with clock() */ // and return them ( = where prepare_const_fields_for_gpu() is called), // so we can send them to Kernels from , instead of from calcenergy.cpp as originally. // ---------------------------------------------------------------------- -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) if (prepare_conststatic_fields_for_gpu(&myligand_reference, mypars, cpu_ref_ori_angles, &KerConstStatic) == 1) -#else + #else if (prepare_conststatic_fields_for_gpu(&myligand_reference, mypars, &KerConstStatic) == 1) -#endif + #endif return 1; //preparing parameter struct @@ -416,7 +338,6 @@ filled with clock() */ dockpars.rotbondlist_length = ((unsigned int) NUM_OF_THREADS_PER_BLOCK*(myligand_reference.num_of_rotcyc)); dockpars.coeff_elec = ((float) mypars->coeffs.scaled_AD4_coeff_elec); dockpars.coeff_desolv = ((float) mypars->coeffs.AD4_coeff_desolv); - // L30nardoSV added dockpars.num_of_energy_evals = (unsigned int) mypars->num_of_energy_evals; dockpars.num_of_generations = (unsigned int) mypars->num_of_generations; @@ -437,11 +358,6 @@ filled with clock() */ dockpars.qasp = mypars->qasp; dockpars.smooth = mypars->smooth; -/* -// passed correctly -printf("%i %i\n", dockpars.num_of_intraE_contributors, myligand_reference.num_of_intraE_contributors); -*/ - // these variables hold multiplications between kernel-constants // better calculate them here and then pass them to Krnl_GA const float two_absmaxdmov = 2.0 * dockpars.abs_max_dmov; @@ -465,49 +381,36 @@ printf("%i %i\n", dockpars.num_of_intraE_contributors, myligand_reference.num_of //allocating GPU memory for populations, floatgrids, //energies, evaluation counters and random number generator states -//#if defined (FIXED_POINT_INTERE) -#if 0 - size_fixedpt64grids = (sizeof(fixedpt64)) * (mygrid->num_of_atypes+2) * (mygrid->size_xyz[0]) * (mygrid->size_xyz[1]) * (mygrid->size_xyz[2]); -#else #if defined(SEPARATE_FGRID_INTERE) size_floatgrids = sizeof(float) * mygrid->num_of_atypes * mygrid->size_xyz[0]*mygrid->size_xyz[1]* mygrid->size_xyz[2]; size_floatgrids2= sizeof(float) * mygrid->size_xyz[0] * mygrid->size_xyz[1] * mygrid->size_xyz[2]; size_floatgrids3= sizeof(float) * mygrid->size_xyz[0] * mygrid->size_xyz[1] * mygrid->size_xyz[2]; #else - size_floatgrids = (sizeof(float)) * (mygrid->num_of_atypes+2) * (mygrid->size_xyz[0]) * (mygrid->size_xyz[1]) * (mygrid->size_xyz[2]); + size_floatgrids = sizeof(float) * (mygrid->num_of_atypes+2) * (mygrid->size_xyz[0]) * (mygrid->size_xyz[1]) * (mygrid->size_xyz[2]); #endif -#endif -#if defined (FIXED_POINT_INTERE) + #if defined (FIXED_POINT_INTERE) mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ATOMS*sizeof(fixedpt64), &mem_KerConstStatic_fixpt64_atom_charges_const); -#endif + #endif mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ATOMS*sizeof(float), &mem_KerConstStatic_atom_charges_const); - mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ATOMS*sizeof(char), &mem_KerConstStatic_atom_types_const); - -/* - mallocBufferObject(context,CL_MEM_READ_ONLY, 3*MAX_INTRAE_CONTRIBUTORS*sizeof(char), &mem_KerConstStatic_intraE_contributors_const); -*/ - mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_INTRAE_CONTRIBUTORS*sizeof(cl_char3), &mem_KerConstStatic_intraE_contributors_const); - + mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_INTRAE_CONTRIBUTORS*sizeof(cl_char3), &mem_KerConstStatic_intraE_contributors_const); mallocBufferObject(context,CL_MEM_READ_ONLY, ATYPE_NUM*sizeof(float), &mem_KerConstStatic_reqm_const); mallocBufferObject(context,CL_MEM_READ_ONLY, ATYPE_NUM*sizeof(float), &mem_KerConstStatic_reqm_hbond_const); mallocBufferObject(context,CL_MEM_READ_ONLY, ATYPE_NUM*sizeof(unsigned int), &mem_KerConstStatic_atom1_types_reqm_const); mallocBufferObject(context,CL_MEM_READ_ONLY, ATYPE_NUM*sizeof(unsigned int), &mem_KerConstStatic_atom2_types_reqm_const); - mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES*sizeof(float), &mem_KerConstStatic_VWpars_AC_const); mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES*sizeof(float), &mem_KerConstStatic_VWpars_BD_const); mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ATYPES*sizeof(float), &mem_KerConstStatic_dspars_S_const); mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ATYPES*sizeof(float), &mem_KerConstStatic_dspars_V_const); + #if defined (FIXED_POINT_CONFORM) - // fixed-point mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ROTATIONS*sizeof(fixedpt), &mem_KerConstStatic_rotlist_const); mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ATOMS*sizeof(cl_int3), &mem_KerConstStatic_ref_coords_const); mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ROTBONDS*sizeof(cl_int3), &mem_KerConstStatic_rotbonds_moving_vectors_const); mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ROTBONDS*sizeof(cl_int3), &mem_KerConstStatic_rotbonds_unit_vectors_const); mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_RUNS*sizeof(cl_int4), &mem_KerConstStatic_ref_orientation_quats_const); #else - // floating-point (original) mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ROTATIONS*sizeof(int), &mem_KerConstStatic_rotlist_const); mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ATOMS*sizeof(cl_float3), &mem_KerConstStatic_ref_coords_const); mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_ROTBONDS*sizeof(cl_float3), &mem_KerConstStatic_rotbonds_moving_vectors_const); @@ -515,37 +418,33 @@ printf("%i %i\n", dockpars.num_of_intraE_contributors, myligand_reference.num_of mallocBufferObject(context,CL_MEM_READ_ONLY, MAX_NUM_OF_RUNS*sizeof(cl_float4), &mem_KerConstStatic_ref_orientation_quats_const); #endif -//#if defined (FIXED_POINT_INTERE) -#if 0 - mallocBufferObject(context,CL_MEM_READ_ONLY,size_fixedpt64grids, &mem_dockpars_fgrids); -#else mallocBufferObject(context,CL_MEM_READ_ONLY,size_floatgrids, &mem_dockpars_fgrids); #if defined(SEPARATE_FGRID_INTERE) mallocBufferObject(context,CL_MEM_READ_ONLY,size_floatgrids2, &mem_dockpars_fgrids2); mallocBufferObject(context,CL_MEM_READ_ONLY,size_floatgrids3, &mem_dockpars_fgrids3); #endif -#endif mallocBufferObject(context,CL_MEM_READ_WRITE,size_populations, &mem_dockpars_conformations_current); mallocBufferObject(context,CL_MEM_READ_WRITE,size_energies, &mem_dockpars_energies_current); -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) mallocBufferObject(context,CL_MEM_WRITE_ONLY,size_evals_of_runs, &mem_evals_performed); mallocBufferObject(context,CL_MEM_WRITE_ONLY,size_evals_of_runs, &mem_gens_performed); -#else + #else mallocBufferObject(context,CL_MEM_WRITE_ONLY,2*sizeof(unsigned int), &mem_evals_and_generations_performed); -#endif + #endif -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) -#else + #else unsigned int array_evals_and_generations_performed [2]; // [0]: evals, [1]: generations -#endif + #endif -#if defined (FIXED_POINT_INTERE) + #if defined (FIXED_POINT_INTERE) memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_fixpt64_atom_charges_const, &KerConstStatic.fixpt64_atom_charges_const[0], MAX_NUM_OF_ATOMS*sizeof(fixedpt64)); -#endif + #endif + memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_atom_charges_const, &KerConstStatic.atom_charges_const[0], MAX_NUM_OF_ATOMS*sizeof(float)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_atom_types_const, &KerConstStatic.atom_types_const[0], MAX_NUM_OF_ATOMS*sizeof(char)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_intraE_contributors_const, &KerConstStatic.intraE_contributors_const[0], MAX_INTRAE_CONTRIBUTORS*sizeof(cl_char3)); @@ -559,15 +458,14 @@ printf("%i %i\n", dockpars.num_of_intraE_contributors, myligand_reference.num_of memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_VWpars_BD_const, &KerConstStatic.VWpars_BD_const[0], MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES*sizeof(float)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_dspars_S_const, &KerConstStatic.dspars_S_const[0], MAX_NUM_OF_ATYPES*sizeof(float)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_dspars_V_const, &KerConstStatic.dspars_V_const[0], MAX_NUM_OF_ATYPES*sizeof(float)); + #if defined (FIXED_POINT_CONFORM) - // fixed-point memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_rotlist_const, &KerConstStatic.rotlist_const[0], MAX_NUM_OF_ROTATIONS*sizeof(fixedpt)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_ref_coords_const, &KerConstStatic.ref_coords_const[0], MAX_NUM_OF_ATOMS*sizeof(cl_int3)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_rotbonds_moving_vectors_const, &KerConstStatic.rotbonds_moving_vectors_const[0], MAX_NUM_OF_ROTBONDS*sizeof(cl_int3)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_rotbonds_unit_vectors_const, &KerConstStatic.rotbonds_unit_vectors_const[0], MAX_NUM_OF_ROTBONDS*sizeof(cl_int3)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_ref_orientation_quats_const, &KerConstStatic.ref_orientation_quats_const[0], MAX_NUM_OF_RUNS*sizeof(cl_int4)); #else - // floating-point (original) memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_rotlist_const, &KerConstStatic.rotlist_const[0], MAX_NUM_OF_ROTATIONS*sizeof(int)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_ref_coords_const, &KerConstStatic.ref_coords_const[0], MAX_NUM_OF_ATOMS*sizeof(cl_float3)); memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_rotbonds_moving_vectors_const, &KerConstStatic.rotbonds_moving_vectors_const[0], MAX_NUM_OF_ROTBONDS*sizeof(cl_float3)); @@ -575,242 +473,198 @@ printf("%i %i\n", dockpars.num_of_intraE_contributors, myligand_reference.num_of memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_ref_orientation_quats_const, &KerConstStatic.ref_orientation_quats_const[0], MAX_NUM_OF_RUNS*sizeof(cl_float4)); #endif -//#if defined (FIXED_POINT_INTERE) -#if 0 -/* - cpu_fixedpt64grids = (fixedpt64*) alignedMalloc((sizeof(fixedpt64))*(mygrid->num_of_atypes+2)* - (mygrid->size_xyz[0])* - (mygrid->size_xyz[1])* - (mygrid->size_xyz[2])); -*/ - cpu_fixedpt64grids = (fixedpt64*) alignedMalloc(size_fixedpt64grids); - - float* mypoi; - fixedpt64* myqoi; - mypoi = cpu_floatgrids; - myqoi = cpu_fixedpt64grids; - - for (int t=0; t < mygrid->num_of_atypes+2; t++) { - //reading values - for (int z=0; z < mygrid->size_xyz[2]; z++) - for (int y=0; y < mygrid->size_xyz[1]; y++) - for (int x=0; x < mygrid->size_xyz[0]; x++) - { - //fscanf(fp, "%f", mypoi); - //mypoi++; - float tmp_grids = *mypoi; - mypoi++; - *myqoi = fixedpt64_fromfloat(tmp_grids); - - // test to prove that it requires 32.32 - // and 16.16 is not enough - //fixedpt myqoi16 = fixedpt_fromfloat(tmp_grids); - //printf("%-10f %-10f %-10f\n", tmp_grids, fixedpt64_tofloat(*myqoi), fixedpt_tofloat(myqoi16)); - - myqoi++; - - - } - } - - memcopyBufferObjectToDevice(command_queue1,mem_dockpars_fgrids, cpu_fixedpt64grids, size_fixedpt64grids); -#else memcopyBufferObjectToDevice(command_queue_ga,mem_dockpars_fgrids, cpu_floatgrids, size_floatgrids); #if defined(SEPARATE_FGRID_INTERE) memcopyBufferObjectToDevice(command_queue_ga,mem_dockpars_fgrids2, cpu_floatgrids + mygrid->num_of_atypes * mygrid->size_xyz[0]*mygrid->size_xyz[1]* mygrid->size_xyz[2], size_floatgrids2); memcopyBufferObjectToDevice(command_queue_ga,mem_dockpars_fgrids3, cpu_floatgrids + (mygrid->num_of_atypes+1) * mygrid->size_xyz[0]*mygrid->size_xyz[1]* mygrid->size_xyz[2], size_floatgrids3); #endif -#endif clock_start_docking = clock(); -#ifdef ENABLE_KRNL_GA + #ifdef ENABLE_KRNL_GA #if defined(SINGLE_COPY_POP_ENE) - setKernelArg(kernel_ga,0, sizeof(mem_dockpars_conformations_current), &mem_dockpars_conformations_current); - setKernelArg(kernel_ga,1, sizeof(mem_dockpars_energies_current), &mem_dockpars_energies_current); - setKernelArg(kernel_ga,2, sizeof(mem_evals_performed), &mem_evals_performed); - setKernelArg(kernel_ga,3, sizeof(mem_gens_performed), &mem_gens_performed); - setKernelArg(kernel_ga,4, sizeof(unsigned int), &dockpars.pop_size); - setKernelArg(kernel_ga,5, sizeof(unsigned int), &dockpars.num_of_energy_evals); - setKernelArg(kernel_ga,6, sizeof(unsigned int), &dockpars.num_of_generations); - setKernelArg(kernel_ga,7, sizeof(float), &dockpars.tournament_rate); - setKernelArg(kernel_ga,8, sizeof(float), &dockpars.mutation_rate); - setKernelArg(kernel_ga,9, sizeof(float), &dockpars.abs_max_dmov); - setKernelArg(kernel_ga,10, sizeof(float), &dockpars.abs_max_dang); - setKernelArg(kernel_ga,11, sizeof(float), &two_absmaxdmov); - setKernelArg(kernel_ga,12, sizeof(float), &two_absmaxdang); - setKernelArg(kernel_ga,13, sizeof(float), &dockpars.crossover_rate); - setKernelArg(kernel_ga,14, sizeof(unsigned int), &dockpars.num_of_lsentities); - setKernelArg(kernel_ga,15, sizeof(unsigned char), &dockpars.num_of_genes); + setKernelArg(kernel_ga,0, sizeof(cl_mem), &mem_dockpars_conformations_current); + setKernelArg(kernel_ga,1, sizeof(cl_mem), &mem_dockpars_energies_current); + setKernelArg(kernel_ga,2, sizeof(cl_mem), &mem_evals_performed); + setKernelArg(kernel_ga,3, sizeof(cl_mem), &mem_gens_performed); + setKernelArg(kernel_ga,4, sizeof(unsigned int), &dockpars.pop_size); + setKernelArg(kernel_ga,5, sizeof(unsigned int), &dockpars.num_of_energy_evals); + setKernelArg(kernel_ga,6, sizeof(unsigned int), &dockpars.num_of_generations); + setKernelArg(kernel_ga,7, sizeof(float), &dockpars.tournament_rate); + setKernelArg(kernel_ga,8, sizeof(float), &dockpars.mutation_rate); + setKernelArg(kernel_ga,9, sizeof(float), &dockpars.abs_max_dmov); + setKernelArg(kernel_ga,10, sizeof(float), &dockpars.abs_max_dang); + setKernelArg(kernel_ga,11, sizeof(float), &two_absmaxdmov); + setKernelArg(kernel_ga,12, sizeof(float), &two_absmaxdang); + setKernelArg(kernel_ga,13, sizeof(float), &dockpars.crossover_rate); + setKernelArg(kernel_ga,14, sizeof(unsigned int), &dockpars.num_of_lsentities); + setKernelArg(kernel_ga,15, sizeof(unsigned char), &dockpars.num_of_genes); #else - setKernelArg(kernel_ga,0, sizeof(mem_dockpars_conformations_current), &mem_dockpars_conformations_current); - setKernelArg(kernel_ga,1, sizeof(mem_dockpars_energies_current), &mem_dockpars_energies_current); - setKernelArg(kernel_ga,2, sizeof(mem_evals_and_generations_performed), &mem_evals_and_generations_performed); - setKernelArg(kernel_ga,3, sizeof(unsigned int), &dockpars.pop_size); - setKernelArg(kernel_ga,4, sizeof(unsigned int), &dockpars.num_of_energy_evals); - setKernelArg(kernel_ga,5, sizeof(unsigned int), &dockpars.num_of_generations); - setKernelArg(kernel_ga,6, sizeof(float), &dockpars.tournament_rate); - setKernelArg(kernel_ga,7, sizeof(float), &dockpars.mutation_rate); - setKernelArg(kernel_ga,8, sizeof(float), &dockpars.abs_max_dmov); - setKernelArg(kernel_ga,9, sizeof(float), &dockpars.abs_max_dang); - setKernelArg(kernel_ga,10, sizeof(float), &two_absmaxdmov); - setKernelArg(kernel_ga,11, sizeof(float), &two_absmaxdang); - setKernelArg(kernel_ga,12, sizeof(float), &dockpars.crossover_rate); - setKernelArg(kernel_ga,13, sizeof(unsigned int), &dockpars.num_of_lsentities); - setKernelArg(kernel_ga,14, sizeof(unsigned char), &dockpars.num_of_genes); + setKernelArg(kernel_ga,0, sizeof(cl_mem), &mem_dockpars_conformations_current); + setKernelArg(kernel_ga,1, sizeof(cl_mem), &mem_dockpars_energies_current); + setKernelArg(kernel_ga,2, sizeof(cl_mem), &mem_evals_and_generations_performed); + setKernelArg(kernel_ga,3, sizeof(unsigned int), &dockpars.pop_size); + setKernelArg(kernel_ga,4, sizeof(unsigned int), &dockpars.num_of_energy_evals); + setKernelArg(kernel_ga,5, sizeof(unsigned int), &dockpars.num_of_generations); + setKernelArg(kernel_ga,6, sizeof(float), &dockpars.tournament_rate); + setKernelArg(kernel_ga,7, sizeof(float), &dockpars.mutation_rate); + setKernelArg(kernel_ga,8, sizeof(float), &dockpars.abs_max_dmov); + setKernelArg(kernel_ga,9, sizeof(float), &dockpars.abs_max_dang); + setKernelArg(kernel_ga,10, sizeof(float), &two_absmaxdmov); + setKernelArg(kernel_ga,11, sizeof(float), &two_absmaxdang); + setKernelArg(kernel_ga,12, sizeof(float), &dockpars.crossover_rate); + setKernelArg(kernel_ga,13, sizeof(unsigned int), &dockpars.num_of_lsentities); + setKernelArg(kernel_ga,14, sizeof(unsigned char), &dockpars.num_of_genes); + #endif #endif -#endif -#ifdef ENABLE_KRNL_CONFORM - setKernelArg(kernel_conform,0, sizeof(mem_KerConstStatic_rotlist_const), &mem_KerConstStatic_rotlist_const); - setKernelArg(kernel_conform,1, sizeof(mem_KerConstStatic_ref_coords_const), &mem_KerConstStatic_ref_coords_const); - setKernelArg(kernel_conform,2, sizeof(mem_KerConstStatic_rotbonds_moving_vectors_const), &mem_KerConstStatic_rotbonds_moving_vectors_const); - setKernelArg(kernel_conform,3, sizeof(mem_KerConstStatic_rotbonds_unit_vectors_const), &mem_KerConstStatic_rotbonds_unit_vectors_const); - setKernelArg(kernel_conform,4, sizeof(unsigned int), &dockpars.rotbondlist_length); - setKernelArg(kernel_conform,5, sizeof(unsigned char), &dockpars.num_of_atoms); - setKernelArg(kernel_conform,6, sizeof(unsigned char), &dockpars.num_of_genes); - setKernelArg(kernel_conform,7, sizeof(mem_KerConstStatic_ref_orientation_quats_const), &mem_KerConstStatic_ref_orientation_quats_const); + #ifdef ENABLE_KRNL_CONFORM + setKernelArg(kernel_conform,0, sizeof(cl_mem), &mem_KerConstStatic_rotlist_const); + setKernelArg(kernel_conform,1, sizeof(cl_mem), &mem_KerConstStatic_ref_coords_const); + setKernelArg(kernel_conform,2, sizeof(cl_mem), &mem_KerConstStatic_rotbonds_moving_vectors_const); + setKernelArg(kernel_conform,3, sizeof(cl_mem), &mem_KerConstStatic_rotbonds_unit_vectors_const); + setKernelArg(kernel_conform,4, sizeof(unsigned int), &dockpars.rotbondlist_length); + setKernelArg(kernel_conform,5, sizeof(unsigned char), &dockpars.num_of_atoms); + setKernelArg(kernel_conform,6, sizeof(unsigned char), &dockpars.num_of_genes); + setKernelArg(kernel_conform,7, sizeof(cl_mem), &mem_KerConstStatic_ref_orientation_quats_const); #if defined(SINGLE_COPY_POP_ENE) #else #if defined (FIXED_POINT_CONFORM) - // fixed-point setKernelArg(kernel_conform,8, sizeof(fixedpt), &KerConstDynamic.ref_orientation_quats_const[0]); setKernelArg(kernel_conform,9, sizeof(fixedpt), &KerConstDynamic.ref_orientation_quats_const[1]); setKernelArg(kernel_conform,10, sizeof(fixedpt), &KerConstDynamic.ref_orientation_quats_const[2]); setKernelArg(kernel_conform,11, sizeof(fixedpt), &KerConstDynamic.ref_orientation_quats_const[3]); #else - // floating-point (original) setKernelArg(kernel_conform,8, sizeof(float), &KerConstDynamic.ref_orientation_quats_const[0]); setKernelArg(kernel_conform,9, sizeof(float), &KerConstDynamic.ref_orientation_quats_const[1]); setKernelArg(kernel_conform,10, sizeof(float), &KerConstDynamic.ref_orientation_quats_const[2]); setKernelArg(kernel_conform,11, sizeof(float), &KerConstDynamic.ref_orientation_quats_const[3]); #endif #endif -#endif - + #endif unsigned char gridsizex_minus1 = dockpars.gridsize_x - 1; unsigned char gridsizey_minus1 = dockpars.gridsize_y - 1; unsigned char gridsizez_minus1 = dockpars.gridsize_z - 1; -#if defined (FIXED_POINT_INTERE) + #if defined (FIXED_POINT_INTERE) -#else + #else float fgridsizex_minus1 = (float) gridsizex_minus1; float fgridsizey_minus1 = (float) gridsizey_minus1; float fgridsizez_minus1 = (float) gridsizez_minus1; + #endif -#endif - + #ifdef ENABLE_KRNL_INTERE -#ifdef ENABLE_KRNL_INTERE - setKernelArg(kernel_intere,0, sizeof(mem_dockpars_fgrids), &mem_dockpars_fgrids); + setKernelArg(kernel_intere,0, sizeof(cl_mem), &mem_dockpars_fgrids); #if defined (FIXED_POINT_INTERE) - setKernelArg(kernel_intere,1, sizeof(mem_KerConstStatic_fixpt64_atom_charges_const), &mem_KerConstStatic_fixpt64_atom_charges_const); + setKernelArg(kernel_intere,1, sizeof(cl_mem), &mem_KerConstStatic_fixpt64_atom_charges_const); #else - setKernelArg(kernel_intere,1, sizeof(mem_KerConstStatic_atom_charges_const), &mem_KerConstStatic_atom_charges_const); + setKernelArg(kernel_intere,1, sizeof(cl_mem), &mem_KerConstStatic_atom_charges_const); #endif - setKernelArg(kernel_intere,2, sizeof(mem_KerConstStatic_atom_types_const), &mem_KerConstStatic_atom_types_const); - setKernelArg(kernel_intere,3, sizeof(unsigned char), &dockpars.g1); - setKernelArg(kernel_intere,4, sizeof(unsigned int), &dockpars.g2); - setKernelArg(kernel_intere,5, sizeof(unsigned int), &dockpars.g3); - setKernelArg(kernel_intere,6, sizeof(unsigned char), &dockpars.num_of_atoms); + setKernelArg(kernel_intere,2, sizeof(cl_mem), &mem_KerConstStatic_atom_types_const); + setKernelArg(kernel_intere,3, sizeof(unsigned char), &dockpars.g1); + setKernelArg(kernel_intere,4, sizeof(unsigned int), &dockpars.g2); + setKernelArg(kernel_intere,5, sizeof(unsigned int), &dockpars.g3); + setKernelArg(kernel_intere,6, sizeof(unsigned char), &dockpars.num_of_atoms); #if defined (FIXED_POINT_INTERE) - setKernelArg(kernel_intere,7, sizeof(unsigned char), &gridsizex_minus1); - setKernelArg(kernel_intere,8, sizeof(unsigned char), &gridsizey_minus1); - setKernelArg(kernel_intere,9, sizeof(unsigned char), &gridsizez_minus1); + setKernelArg(kernel_intere,7, sizeof(unsigned char), &gridsizex_minus1); + setKernelArg(kernel_intere,8, sizeof(unsigned char), &gridsizey_minus1); + setKernelArg(kernel_intere,9, sizeof(unsigned char), &gridsizez_minus1); #else - setKernelArg(kernel_intere,7, sizeof(float), &fgridsizex_minus1); - setKernelArg(kernel_intere,8, sizeof(float), &fgridsizey_minus1); - setKernelArg(kernel_intere,9, sizeof(float), &fgridsizez_minus1); + setKernelArg(kernel_intere,7, sizeof(float), &fgridsizex_minus1); + setKernelArg(kernel_intere,8, sizeof(float), &fgridsizey_minus1); + setKernelArg(kernel_intere,9, sizeof(float), &fgridsizez_minus1); #endif #if defined(SEPARATE_FGRID_INTERE) - setKernelArg(kernel_intere,10, sizeof(mem_dockpars_fgrids2), &mem_dockpars_fgrids2); - setKernelArg(kernel_intere,11, sizeof(mem_dockpars_fgrids3), &mem_dockpars_fgrids3); + setKernelArg(kernel_intere,10, sizeof(cl_mem), &mem_dockpars_fgrids2); + setKernelArg(kernel_intere,11, sizeof(cl_mem), &mem_dockpars_fgrids3); #else - setKernelArg(kernel_intere,10, sizeof(unsigned int), &mul_tmp2); - setKernelArg(kernel_intere,11, sizeof(unsigned int), &mul_tmp3); + setKernelArg(kernel_intere,10, sizeof(unsigned int), &mul_tmp2); + setKernelArg(kernel_intere,11, sizeof(unsigned int), &mul_tmp3); #endif -#endif -#ifdef ENABLE_KRNL_INTRAE - setKernelArg(kernel_intrae,0, sizeof(mem_KerConstStatic_atom_charges_const), &mem_KerConstStatic_atom_charges_const); - setKernelArg(kernel_intrae,1, sizeof(mem_KerConstStatic_atom_types_const), &mem_KerConstStatic_atom_types_const); - setKernelArg(kernel_intrae,2, sizeof(mem_KerConstStatic_intraE_contributors_const), &mem_KerConstStatic_intraE_contributors_const); - setKernelArg(kernel_intrae,3, sizeof(dockpars.smooth), &dockpars.smooth); - setKernelArg(kernel_intrae,4, sizeof(mem_KerConstStatic_reqm_const), &mem_KerConstStatic_reqm_const); - setKernelArg(kernel_intrae,5, sizeof(mem_KerConstStatic_reqm_hbond_const), &mem_KerConstStatic_reqm_hbond_const); - setKernelArg(kernel_intrae,6, sizeof(mem_KerConstStatic_atom1_types_reqm_const), &mem_KerConstStatic_atom1_types_reqm_const); - setKernelArg(kernel_intrae,7, sizeof(mem_KerConstStatic_atom2_types_reqm_const), &mem_KerConstStatic_atom2_types_reqm_const); - setKernelArg(kernel_intrae,8, sizeof(mem_KerConstStatic_VWpars_AC_const), &mem_KerConstStatic_VWpars_AC_const); - setKernelArg(kernel_intrae,9, sizeof(mem_KerConstStatic_VWpars_BD_const), &mem_KerConstStatic_VWpars_BD_const); - setKernelArg(kernel_intrae,10, sizeof(mem_KerConstStatic_dspars_S_const), &mem_KerConstStatic_dspars_S_const); - setKernelArg(kernel_intrae,11, sizeof(mem_KerConstStatic_dspars_V_const), &mem_KerConstStatic_dspars_V_const); - setKernelArg(kernel_intrae,12, sizeof(unsigned char), &dockpars.num_of_atoms); - setKernelArg(kernel_intrae,13, sizeof(unsigned int), &dockpars.num_of_intraE_contributors); - setKernelArg(kernel_intrae,14, sizeof(float), &dockpars.grid_spacing); - setKernelArg(kernel_intrae,15, sizeof(unsigned char), &dockpars.num_of_atypes); - setKernelArg(kernel_intrae,16, sizeof(float), &dockpars.coeff_elec); - setKernelArg(kernel_intrae,17, sizeof(float), &dockpars.qasp); - setKernelArg(kernel_intrae,18, sizeof(float), &dockpars.coeff_desolv); -#endif + #endif -#ifdef ENABLE_KRNL_PRNG_BT_USHORT_FLOAT + #ifdef ENABLE_KRNL_INTRAE + setKernelArg(kernel_intrae,0, sizeof(cl_mem), &mem_KerConstStatic_atom_charges_const); + setKernelArg(kernel_intrae,1, sizeof(cl_mem), &mem_KerConstStatic_atom_types_const); + setKernelArg(kernel_intrae,2, sizeof(cl_mem), &mem_KerConstStatic_intraE_contributors_const); + setKernelArg(kernel_intrae,3, sizeof(dockpars.smooth), &dockpars.smooth); + setKernelArg(kernel_intrae,4, sizeof(cl_mem), &mem_KerConstStatic_reqm_const); + setKernelArg(kernel_intrae,5, sizeof(cl_mem), &mem_KerConstStatic_reqm_hbond_const); + setKernelArg(kernel_intrae,6, sizeof(cl_mem), &mem_KerConstStatic_atom1_types_reqm_const); + setKernelArg(kernel_intrae,7, sizeof(cl_mem), &mem_KerConstStatic_atom2_types_reqm_const); + setKernelArg(kernel_intrae,8, sizeof(cl_mem), &mem_KerConstStatic_VWpars_AC_const); + setKernelArg(kernel_intrae,9, sizeof(cl_mem), &mem_KerConstStatic_VWpars_BD_const); + setKernelArg(kernel_intrae,10, sizeof(cl_mem), &mem_KerConstStatic_dspars_S_const); + setKernelArg(kernel_intrae,11, sizeof(cl_mem), &mem_KerConstStatic_dspars_V_const); + setKernelArg(kernel_intrae,12, sizeof(unsigned char), &dockpars.num_of_atoms); + setKernelArg(kernel_intrae,13, sizeof(unsigned int), &dockpars.num_of_intraE_contributors); + setKernelArg(kernel_intrae,14, sizeof(float), &dockpars.grid_spacing); + setKernelArg(kernel_intrae,15, sizeof(unsigned char), &dockpars.num_of_atypes); + setKernelArg(kernel_intrae,16, sizeof(float), &dockpars.coeff_elec); + setKernelArg(kernel_intrae,17, sizeof(float), &dockpars.qasp); + setKernelArg(kernel_intrae,18, sizeof(float), &dockpars.coeff_desolv); + #endif + + #ifdef ENABLE_KRNL_PRNG_BT_USHORT_FLOAT setKernelArg(kernel_prng_bt_ushort_float,2, sizeof(unsigned int), &dockpars.pop_size); -#endif + #endif -#ifdef ENABLE_KRNL_PRNG_GG_UCHAR + #ifdef ENABLE_KRNL_PRNG_GG_UCHAR setKernelArg(kernel_prng_gg_uchar,1, sizeof(unsigned char), &dockpars.num_of_genes); -#endif + #endif -#ifdef ENABLE_KRNL_PRNG_GG_FLOAT + #ifdef ENABLE_KRNL_PRNG_GG_FLOAT setKernelArg(kernel_prng_gg_float,1, sizeof(unsigned char), &dockpars.num_of_genes); -#endif + #endif -#ifdef ENABLE_KRNL_PRNG_LS_FLOAT + #ifdef ENABLE_KRNL_PRNG_LS_FLOAT setKernelArg(kernel_prng_ls_float,1, sizeof(unsigned char), &dockpars.num_of_genes); -#endif + #endif -#if defined (FIXED_POINT_LS) -fixedpt fixpt_base_dmov_mul_sqrt3 = fixedpt_fromfloat(dockpars.base_dmov_mul_sqrt3); -fixedpt fixpt_base_dang_mul_sqrt3 = fixedpt_fromfloat(dockpars.base_dang_mul_sqrt3); -fixedpt fixpt_rho_lower_bound = fixedpt_fromfloat(dockpars.rho_lower_bound); -#endif + #if defined (FIXED_POINT_LS) + fixedpt fixpt_base_dmov_mul_sqrt3 = fixedpt_fromfloat(dockpars.base_dmov_mul_sqrt3); + fixedpt fixpt_base_dang_mul_sqrt3 = fixedpt_fromfloat(dockpars.base_dang_mul_sqrt3); + fixedpt fixpt_rho_lower_bound = fixedpt_fromfloat(dockpars.rho_lower_bound); + #endif -unsigned short Host_max_num_of_iters = (unsigned short)dockpars.max_num_of_iters; -unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; + unsigned short Host_max_num_of_iters = (unsigned short)dockpars.max_num_of_iters; + unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; -#ifdef ENABLE_KRNL_LS - setKernelArg(kernel_ls,0, sizeof(unsigned short), &Host_max_num_of_iters); + #ifdef ENABLE_KRNL_LS + setKernelArg(kernel_ls,0, sizeof(unsigned short), &Host_max_num_of_iters); #if defined (FIXED_POINT_LS) - setKernelArg(kernel_ls,1, sizeof(fixedpt), &fixpt_rho_lower_bound); - setKernelArg(kernel_ls,2, sizeof(fixedpt), &fixpt_base_dmov_mul_sqrt3); + setKernelArg(kernel_ls,1, sizeof(fixedpt), &fixpt_rho_lower_bound); + setKernelArg(kernel_ls,2, sizeof(fixedpt), &fixpt_base_dmov_mul_sqrt3); #else - setKernelArg(kernel_ls,1, sizeof(float), &dockpars.rho_lower_bound); - setKernelArg(kernel_ls,2, sizeof(float), &dockpars.base_dmov_mul_sqrt3); + setKernelArg(kernel_ls,1, sizeof(float), &dockpars.rho_lower_bound); + setKernelArg(kernel_ls,2, sizeof(float), &dockpars.base_dmov_mul_sqrt3); #endif - setKernelArg(kernel_ls,3, sizeof(unsigned char), &dockpars.num_of_genes); + setKernelArg(kernel_ls,3, sizeof(unsigned char), &dockpars.num_of_genes); #if defined (FIXED_POINT_LS) - setKernelArg(kernel_ls,4, sizeof(fixedpt), &fixpt_base_dang_mul_sqrt3); + setKernelArg(kernel_ls,4, sizeof(fixedpt), &fixpt_base_dang_mul_sqrt3); #else - setKernelArg(kernel_ls,4, sizeof(float), &dockpars.base_dang_mul_sqrt3); + setKernelArg(kernel_ls,4, sizeof(float), &dockpars.base_dang_mul_sqrt3); #endif - setKernelArg(kernel_ls,5, sizeof(unsigned char), &Host_cons_limit); -#endif + setKernelArg(kernel_ls,5, sizeof(unsigned char), &Host_cons_limit); + #endif -#ifdef ENABLE_KRNL_PRNG_LS123_USHORT + #ifdef ENABLE_KRNL_PRNG_LS123_USHORT setKernelArg(kernel_prng_ls123_ushort,9, sizeof(unsigned int), &dockpars.pop_size); -#endif + #endif -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) memcopyBufferObjectToDevice(command_queue_ga,mem_dockpars_conformations_current, cpu_init_populations, size_populations); -#endif + #endif #if 0 // ********************************************** @@ -886,10 +740,6 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; for (unsigned int run_cnt = 0; run_cnt < mypars->num_of_runs; run_cnt++) { -/* - printf("Run %3u started ... \n", run_cnt+1); - fflush(stdout); -*/ if (mypars->power == 1) { printf(" %-4u", run_cnt+1); } @@ -911,9 +761,9 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; // ********************************************** #endif -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) -#else + #else myligand_reference = *myligand_init; gen_initpop_and_reflig(mypars, cpu_init_populations, cpu_ref_ori_angles, &myligand_reference, mygrid); @@ -921,37 +771,36 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; return 1; memcopyBufferObjectToDevice(command_queue1,mem_dockpars_conformations_current, cpu_init_populations, size_populations); -#endif + #endif -#if defined(SINGLE_COPY_POP_ENE) - #ifdef ENABLE_KRNL_GA + #if defined(SINGLE_COPY_POP_ENE) + #ifdef ENABLE_KRNL_GA unsigned int Host_Offset_Pop = run_cnt * dockpars.pop_size * ACTUAL_GENOTYPE_LENGTH; unsigned int Host_Offset_Ene = run_cnt * dockpars.pop_size; setKernelArg(kernel_ga,16, sizeof(unsigned short), &run_cnt); setKernelArg(kernel_ga,17, sizeof(unsigned int), &Host_Offset_Pop); setKernelArg(kernel_ga,18, sizeof(unsigned int), &Host_Offset_Ene); + #endif + #endif - #endif -#endif - -#ifdef ENABLE_KRNL_CONFORM - setKernelArg(kernel_conform,8, sizeof(unsigned short), &run_cnt); -#endif + #ifdef ENABLE_KRNL_CONFORM + setKernelArg(kernel_conform,8, sizeof(unsigned short), &run_cnt); + #endif -#ifdef ENABLE_KRNL_PRNG_BT_USHORT_FLOAT + #ifdef ENABLE_KRNL_PRNG_BT_USHORT_FLOAT setKernelArg(kernel_prng_bt_ushort_float,0, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 14]); setKernelArg(kernel_prng_bt_ushort_float,1, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 15]); -#endif + #endif -#ifdef ENABLE_KRNL_PRNG_GG_UCHAR + #ifdef ENABLE_KRNL_PRNG_GG_UCHAR setKernelArg(kernel_prng_gg_uchar,0, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 2]); -#endif + #endif -#ifdef ENABLE_KRNL_PRNG_GG_FLOAT + #ifdef ENABLE_KRNL_PRNG_GG_FLOAT setKernelArg(kernel_prng_gg_float,0, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt]); -#endif + #endif -#ifdef ENABLE_KRNL_PRNG_LS123_USHORT + #ifdef ENABLE_KRNL_PRNG_LS123_USHORT setKernelArg(kernel_prng_ls123_ushort,0, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 5]); setKernelArg(kernel_prng_ls123_ushort,1, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 6]); setKernelArg(kernel_prng_ls123_ushort,2, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 7]); @@ -961,11 +810,11 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; setKernelArg(kernel_prng_ls123_ushort,6, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 11]); setKernelArg(kernel_prng_ls123_ushort,7, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 12]); setKernelArg(kernel_prng_ls123_ushort,8, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 13]); -#endif + #endif -#ifdef ENABLE_KRNL_PRNG_LS_FLOAT + #ifdef ENABLE_KRNL_PRNG_LS_FLOAT setKernelArg(kernel_prng_ls_float,0, sizeof(unsigned int), &cpu_prng_seeds[num_of_prng_blocks * run_cnt + 1]); -#endif + #endif #ifdef ENABLE_KRNL_GA runKernelTask(command_queue_ga,kernel_ga,NULL,NULL); @@ -1057,75 +906,20 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; clock_stop_docking = clock(); -#if defined(SINGLE_COPY_POP_ENE) - - -#else - /* - fflush(stdout); - */ + #if defined(SINGLE_COPY_POP_ENE) + #else //copy results from device memcopyBufferObjectFromDevice(command_queue1,array_evals_and_generations_performed,mem_evals_and_generations_performed,2*sizeof(unsigned int)); mypars->num_of_energy_evals = array_evals_and_generations_performed [0]; mypars->num_of_generations = array_evals_and_generations_performed [1]; memcopyBufferObjectFromDevice(command_queue1,cpu_final_populations,mem_dockpars_conformations_current,size_populations); - memcopyBufferObjectFromDevice(command_queue1,cpu_energies,mem_dockpars_energies_current,size_energies); - - //processing results - -/* - // Fix genotypes so map angle is used for genotypes 3,4,5 - // Check what format is used by host regarding the angles - for (int ent_cnt=0; ent_cntpop_size; ent_cnt++) { - - float temp_genotype[ACTUAL_GENOTYPE_LENGTH]; - memcpy(temp_genotype, cpu_final_populations+ent_cnt*ACTUAL_GENOTYPE_LENGTH, ACTUAL_GENOTYPE_LENGTH*sizeof(float)); - - for (int gene_cnt=3; gene_cntpop_size*GENOTYPE_LENGTH_IN_GLOBMEM, - cpu_energies+run_cnt*mypars->pop_size, mypars->pop_size); -*/ - arrange_result(cpu_final_populations, cpu_energies, mypars->pop_size); - -/* - make_resfiles(cpu_final_populations+run_cnt*mypars->pop_size*GENOTYPE_LENGTH_IN_GLOBMEM, - cpu_energies+run_cnt*mypars->pop_size, - &myligand_reference, - myligand_init, - mypars, - cpu_evals_of_runs[run_cnt], - generation_cnt, - mygrid, - cpu_floatgrids, - cpu_ref_ori_angles+3*run_cnt, - argc, - argv, - 0, - run_cnt, - &(cpu_result_ligands [run_cnt])); -*/ - //To write out final_population generated by get_result make_resfiles(cpu_final_populations, cpu_energies, @@ -1142,17 +936,13 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; 0, run_cnt, &cpu_result_ligands[run_cnt]); - - -#endif - + #endif } // End of for (run_cnt = 0; run_cnt < mypars->num_of_runs; run_cnt++) - printf("\n"); -#if defined(SINGLE_COPY_POP_ENE) + #if defined(SINGLE_COPY_POP_ENE) //copy results from device memcopyBufferObjectFromDevice(command_queue_ga, cpu_evals_of_runs, mem_evals_performed, size_evals_of_runs); @@ -1161,7 +951,6 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; memcopyBufferObjectFromDevice(command_queue_ga,cpu_final_populations,mem_dockpars_conformations_current,size_populations); memcopyBufferObjectFromDevice(command_queue_ga,cpu_energies,mem_dockpars_energies_current,size_energies); - for (unsigned int run_cnt = 0; run_cnt < mypars->num_of_runs; run_cnt++) { arrange_result(cpu_final_populations+run_cnt*mypars->pop_size*ACTUAL_GENOTYPE_LENGTH, @@ -1184,17 +973,7 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; run_cnt, &(cpu_result_ligands [run_cnt])); } // End of for (run_cnt = 0; run_cnt < mypars->num_of_runs; run_cnt++) - -#endif - - - - - - - - - + #endif /* #if defined (DOCK_DEBUG) @@ -1206,20 +985,8 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; #endif */ - - - - - - - - - - - clock_stop_program_before_clustering = clock(); - clusanal_gendlg(cpu_result_ligands, mypars->num_of_runs, myligand_init, mypars, @@ -1229,287 +996,262 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit; ELAPSEDSECS(clock_stop_docking, clock_start_docking)/mypars->num_of_runs, ELAPSEDSECS(clock_stop_program_before_clustering, clock_start_program)); - - - - clock_stop_docking = clock(); - - - - - // Free the resources allocated cleanup(); return 0; } - //// -------------------------------- //// Altera OpenCL Helper Functions //// -------------------------------- bool init() { - cl_int status; - - if(!setCwdToExeDir()) { - return false; - } - - // Get the OpenCL platform. - platform = findPlatform("Intel(R) FPGA"); // use it from aoc v16.1 - //platform = findPlatform("Altera SDK"); // works for harp2, i.e. v16.0 patched - if(platform == NULL) { - printf("ERROR: Unable to find Intel(R) FPGA OpenCL platform.\n"); - return false; - } - - // User-visible output - Platform information - { - char char_buffer[STRING_BUFFER_LEN]; - printf("Querying platform for info:\n"); - printf("==========================\n"); - clGetPlatformInfo(platform, CL_PLATFORM_NAME, STRING_BUFFER_LEN, char_buffer, NULL); - printf("%-40s = %s\n", "CL_PLATFORM_NAME", char_buffer); - clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, STRING_BUFFER_LEN, char_buffer, NULL); - printf("%-40s = %s\n", "CL_PLATFORM_VENDOR ", char_buffer); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, STRING_BUFFER_LEN, char_buffer, NULL); - printf("%-40s = %s\n\n", "CL_PLATFORM_VERSION ", char_buffer); - } - - // Query the available OpenCL devices. - scoped_array devices; - cl_uint num_devices; - - devices.reset(getDevices(platform, CL_DEVICE_TYPE_ALL, &num_devices)); - - // We'll just use the first device. - device = devices[0]; - - // Display some device information. - display_device_info(device); - - // Create the context. - context = clCreateContext(NULL, 1, &device, &oclContextCallback, NULL, &status); - checkError(status, "Failed to create context"); - - // Create the command queue. - //queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - //command_queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status); - //checkError(status, "Failed to create command queue"); - - // Create the program. -/* - std::string binary_file = getBoardBinaryFile("docking", device); -*/ - std::string binary_file = getBoardBinaryFile("Krnl_GA", device); - printf("Using AOCX: %s\n", binary_file.c_str()); - program = createProgramFromBinary(context, binary_file.c_str(), &device, 1); + cl_int status; + if(!setCwdToExeDir()) { + return false; + } - // Build the program that was just created. - status = clBuildProgram(program, 0, NULL, "", NULL, NULL); - checkError(status, "Failed to build program"); + // Get the OpenCL platform. + platform = findPlatform("Intel(R) FPGA"); // use it from aoc v16.1 + if(platform == NULL) { + printf("ERROR: Unable to find Intel(R) FPGA OpenCL platform.\n"); + return false; + } + // User-visible output - Platform information + { + char char_buffer[STRING_BUFFER_LEN]; + printf("Querying platform for info:\n"); + printf("==========================\n"); + clGetPlatformInfo(platform, CL_PLATFORM_NAME, STRING_BUFFER_LEN, char_buffer, NULL); + printf("%-40s = %s\n", "CL_PLATFORM_NAME", char_buffer); + clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, STRING_BUFFER_LEN, char_buffer, NULL); + printf("%-40s = %s\n", "CL_PLATFORM_VENDOR ", char_buffer); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, STRING_BUFFER_LEN, char_buffer, NULL); + printf("%-40s = %s\n\n", "CL_PLATFORM_VERSION ", char_buffer); + } - // Create the kernel - name passed in here must match kernel name in the - // original CL file, that was compiled into an AOCX file using the AOC tool -#ifdef ENABLE_KRNL_GA - command_queue_ga = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue ga"); - kernel_ga = clCreateKernel(program, name_krnl_ga, &status); - checkError(status, "Failed to create kernel ga"); -#endif + // Query the available OpenCL devices. + scoped_array devices; + cl_uint num_devices; -#ifdef ENABLE_KRNL_CONFORM - command_queue_conform = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue conform"); - kernel_conform = clCreateKernel(program, name_krnl_conform, &status); - checkError(status, "Failed to create kernel conform"); -#endif + devices.reset(getDevices(platform, CL_DEVICE_TYPE_ALL, &num_devices)); -#ifdef ENABLE_KRNL_INTERE - command_queue_intere = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue intere"); - kernel_intere = clCreateKernel(program, name_krnl_intere, &status); - checkError(status, "Failed to create kernel intere"); -#endif + // We'll just use the first device. + device = devices[0]; -#ifdef ENABLE_KRNL_INTRAE - command_queue_intrae = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue intrae"); - kernel_intrae = clCreateKernel(program, name_krnl_intrae, &status); - checkError(status, "Failed to create kernel"); -#endif + // Display some device information. + display_device_info(device); -#ifdef ENABLE_KRNL_PRNG_BT_USHORT_FLOAT - command_queue_prng_bt_ushort_float = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue prng_bt_ushort_float"); - kernel_prng_bt_ushort_float = clCreateKernel(program, name_krnl_prng_bt_ushort_float, &status); - checkError(status, "Failed to create kernel prng_bt_ushort_float"); -#endif + // Create the context. + context = clCreateContext(NULL, 1, &device, &oclContextCallback, NULL, &status); + checkError(status, "Failed to create context"); -#ifdef ENABLE_KRNL_PRNG_GG_UCHAR - command_queue_prng_gg_uchar = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue prng_gg_uchar"); - kernel_prng_gg_uchar = clCreateKernel(program, name_krnl_prng_gg_uchar, &status); - checkError(status, "Failed to create kernel prng_gg_uchar"); -#endif + // Create the program. + std::string binary_file = getBoardBinaryFile("Krnl_GA", device); + printf("Using AOCX: %s\n", binary_file.c_str()); + program = createProgramFromBinary(context, binary_file.c_str(), &device, 1); -#ifdef ENABLE_KRNL_PRNG_GG_FLOAT - command_queue_prng_gg_float = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue prng_gg_float"); - kernel_prng_gg_float = clCreateKernel(program, name_krnl_prng_gg_float, &status); - checkError(status, "Failed to create kernel prng_gg_float"); -#endif + // Build the program that was just created. + status = clBuildProgram(program, 0, NULL, "", NULL, NULL); + checkError(status, "Failed to build program"); -#ifdef ENABLE_KRNL_PRNG_LS123_USHORT - command_queue_prng_ls123_ushort = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue prng_ls123_ushort"); - kernel_prng_ls123_ushort = clCreateKernel(program, name_krnl_prng_ls123_ushort, &status); - checkError(status, "Failed to create kernel prng_ls123_ushort"); -#endif + // Create the kernel - name passed in here must match kernel name in the + // original CL file, that was compiled into an AOCX file using the AOC tool + #ifdef ENABLE_KRNL_GA + command_queue_ga = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue ga"); + kernel_ga = clCreateKernel(program, name_krnl_ga, &status); + checkError(status, "Failed to create kernel ga"); + #endif -#ifdef ENABLE_KRNL_PRNG_LS_FLOAT - command_queue_prng_ls_float = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue prng_ls_float"); - kernel_prng_ls_float = clCreateKernel(program, name_krnl_prng_ls_float, &status); - checkError(status, "Failed to create kernel prng_ls_float"); -#endif + #ifdef ENABLE_KRNL_CONFORM + command_queue_conform = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue conform"); + kernel_conform = clCreateKernel(program, name_krnl_conform, &status); + checkError(status, "Failed to create kernel conform"); + #endif -#ifdef ENABLE_KRNL_LS - command_queue_ls = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue_ls"); - kernel_ls = clCreateKernel(program, name_krnl_ls, &status); - checkError(status, "Failed to create kernel ls"); -#endif + #ifdef ENABLE_KRNL_INTERE + command_queue_intere = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue intere"); + kernel_intere = clCreateKernel(program, name_krnl_intere, &status); + checkError(status, "Failed to create kernel intere"); + #endif -#ifdef ENABLE_KRNL_IGL_ARBITER - command_queue_igl_arbiter = clCreateCommandQueue(context, device, 0, &status); - checkError(status, "Failed to create command queue igl_arbiter"); - kernel_igl_arbiter = clCreateKernel(program, name_krnl_igl_arbiter, &status); - checkError(status, "Failed to create kernel igl_arbiter"); -#endif + #ifdef ENABLE_KRNL_INTRAE + command_queue_intrae = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue intrae"); + kernel_intrae = clCreateKernel(program, name_krnl_intrae, &status); + checkError(status, "Failed to create kernel"); + #endif + + #ifdef ENABLE_KRNL_PRNG_BT_USHORT_FLOAT + command_queue_prng_bt_ushort_float = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue prng_bt_ushort_float"); + kernel_prng_bt_ushort_float = clCreateKernel(program, name_krnl_prng_bt_ushort_float, &status); + checkError(status, "Failed to create kernel prng_bt_ushort_float"); + #endif + + #ifdef ENABLE_KRNL_PRNG_GG_UCHAR + command_queue_prng_gg_uchar = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue prng_gg_uchar"); + kernel_prng_gg_uchar = clCreateKernel(program, name_krnl_prng_gg_uchar, &status); + checkError(status, "Failed to create kernel prng_gg_uchar"); + #endif - return true; + #ifdef ENABLE_KRNL_PRNG_GG_FLOAT + command_queue_prng_gg_float = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue prng_gg_float"); + kernel_prng_gg_float = clCreateKernel(program, name_krnl_prng_gg_float, &status); + checkError(status, "Failed to create kernel prng_gg_float"); + #endif + + #ifdef ENABLE_KRNL_PRNG_LS123_USHORT + command_queue_prng_ls123_ushort = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue prng_ls123_ushort"); + kernel_prng_ls123_ushort = clCreateKernel(program, name_krnl_prng_ls123_ushort, &status); + checkError(status, "Failed to create kernel prng_ls123_ushort"); + #endif + + #ifdef ENABLE_KRNL_PRNG_LS_FLOAT + command_queue_prng_ls_float = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue prng_ls_float"); + kernel_prng_ls_float = clCreateKernel(program, name_krnl_prng_ls_float, &status); + checkError(status, "Failed to create kernel prng_ls_float"); + #endif + + #ifdef ENABLE_KRNL_LS + command_queue_ls = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue_ls"); + kernel_ls = clCreateKernel(program, name_krnl_ls, &status); + checkError(status, "Failed to create kernel ls"); + #endif + + #ifdef ENABLE_KRNL_IGL_ARBITER + command_queue_igl_arbiter = clCreateCommandQueue(context, device, 0, &status); + checkError(status, "Failed to create command queue igl_arbiter"); + kernel_igl_arbiter = clCreateKernel(program, name_krnl_igl_arbiter, &status); + checkError(status, "Failed to create kernel igl_arbiter"); + #endif + + return true; } // Free the resources allocated during initialization void cleanup() { -#ifdef ENABLE_KRNL_GA - if(kernel_ga) {clReleaseKernel(kernel_ga);} - if(command_queue_ga) {clReleaseCommandQueue(command_queue_ga);} -#endif + #ifdef ENABLE_KRNL_GA + if(kernel_ga) {clReleaseKernel(kernel_ga);} + if(command_queue_ga) {clReleaseCommandQueue(command_queue_ga);} + #endif -#ifdef ENABLE_KRNL_CONFORM - if(kernel_conform) {clReleaseKernel(kernel_conform);} - if(command_queue_conform) {clReleaseCommandQueue(command_queue_conform);} -#endif + #ifdef ENABLE_KRNL_CONFORM + if(kernel_conform) {clReleaseKernel(kernel_conform);} + if(command_queue_conform) {clReleaseCommandQueue(command_queue_conform);} + #endif -#ifdef ENABLE_KRNL_INTERE - if(kernel_intere) {clReleaseKernel(kernel_intere);} - if(command_queue_intere) {clReleaseCommandQueue(command_queue_intere);} -#endif + #ifdef ENABLE_KRNL_INTERE + if(kernel_intere) {clReleaseKernel(kernel_intere);} + if(command_queue_intere) {clReleaseCommandQueue(command_queue_intere);} + #endif -#ifdef ENABLE_KRNL_INTRAE - if(kernel_intrae) {clReleaseKernel(kernel_intrae);} - if(command_queue_intrae) {clReleaseCommandQueue(command_queue_intrae);} -#endif + #ifdef ENABLE_KRNL_INTRAE + if(kernel_intrae) {clReleaseKernel(kernel_intrae);} + if(command_queue_intrae) {clReleaseCommandQueue(command_queue_intrae);} + #endif -#ifdef ENABLE_KRNL_PRNG_BT_USHORT_FLOAT - if(kernel_prng_bt_ushort_float) {clReleaseKernel(kernel_prng_bt_ushort_float);} - if(command_queue_prng_bt_ushort_float) {clReleaseCommandQueue(command_queue_prng_bt_ushort_float);} -#endif + #ifdef ENABLE_KRNL_PRNG_BT_USHORT_FLOAT + if(kernel_prng_bt_ushort_float) {clReleaseKernel(kernel_prng_bt_ushort_float);} + if(command_queue_prng_bt_ushort_float) {clReleaseCommandQueue(command_queue_prng_bt_ushort_float);} + #endif -#ifdef ENABLE_KRNL_PRNG_GG_UCHAR - if(kernel_prng_gg_uchar) {clReleaseKernel(kernel_prng_gg_uchar);} - if(command_queue_prng_gg_uchar) {clReleaseCommandQueue(command_queue_prng_gg_uchar);} -#endif + #ifdef ENABLE_KRNL_PRNG_GG_UCHAR + if(kernel_prng_gg_uchar) {clReleaseKernel(kernel_prng_gg_uchar);} + if(command_queue_prng_gg_uchar) {clReleaseCommandQueue(command_queue_prng_gg_uchar);} + #endif -#ifdef ENABLE_KRNL_PRNG_GG_FLOAT - if(kernel_prng_gg_float) {clReleaseKernel(kernel_prng_gg_float);} - if(command_queue_prng_gg_float) {clReleaseCommandQueue(command_queue_prng_gg_float);} -#endif + #ifdef ENABLE_KRNL_PRNG_GG_FLOAT + if(kernel_prng_gg_float) {clReleaseKernel(kernel_prng_gg_float);} + if(command_queue_prng_gg_float) {clReleaseCommandQueue(command_queue_prng_gg_float);} + #endif -#ifdef ENABLE_KRNL_PRNG_LS123_USHORT - if(kernel_prng_ls123_ushort) {clReleaseKernel(kernel_prng_ls123_ushort);} - if(command_queue_prng_ls123_ushort) {clReleaseCommandQueue(command_queue_prng_ls123_ushort);} -#endif + #ifdef ENABLE_KRNL_PRNG_LS123_USHORT + if(kernel_prng_ls123_ushort) {clReleaseKernel(kernel_prng_ls123_ushort);} + if(command_queue_prng_ls123_ushort) {clReleaseCommandQueue(command_queue_prng_ls123_ushort);} + #endif -#ifdef ENABLE_KRNL_PRNG_LS_FLOAT - if(kernel_prng_ls_float) {clReleaseKernel(kernel_prng_ls_float);} - if(command_queue_prng_ls_float) {clReleaseCommandQueue(command_queue_prng_ls_float);} -#endif + #ifdef ENABLE_KRNL_PRNG_LS_FLOAT + if(kernel_prng_ls_float) {clReleaseKernel(kernel_prng_ls_float);} + if(command_queue_prng_ls_float) {clReleaseCommandQueue(command_queue_prng_ls_float);} + #endif -#ifdef ENABLE_KRNL_LS - if(kernel_ls) {clReleaseKernel(kernel_ls);} - if(command_queue_ls) {clReleaseCommandQueue(command_queue_ls);} -#endif + #ifdef ENABLE_KRNL_LS + if(kernel_ls) {clReleaseKernel(kernel_ls);} + if(command_queue_ls) {clReleaseCommandQueue(command_queue_ls);} + #endif -#ifdef ENABLE_KRNL_IGL_ARBITER - if(kernel_igl_arbiter) {clReleaseKernel(kernel_igl_arbiter);} - if(command_queue_igl_arbiter) {clReleaseCommandQueue(command_queue_igl_arbiter);} -#endif + #ifdef ENABLE_KRNL_IGL_ARBITER + if(kernel_igl_arbiter) {clReleaseKernel(kernel_igl_arbiter);} + if(command_queue_igl_arbiter) {clReleaseCommandQueue(command_queue_igl_arbiter);} + #endif - if(program) {clReleaseProgram(program);} - if(context) {clReleaseContext(context);} + if(program) {clReleaseProgram(program);} + if(context) {clReleaseContext(context);} - if(cpu_init_populations) {alignedFree(cpu_init_populations);} - if(cpu_final_populations){alignedFree(cpu_final_populations);} - if(cpu_energies) {alignedFree(cpu_energies);} - if(cpu_result_ligands) {alignedFree(cpu_result_ligands);} - if(cpu_prng_seeds) {alignedFree(cpu_prng_seeds);} -#if defined(SINGLE_COPY_POP_ENE) - if(cpu_evals_of_runs) {alignedFree(cpu_evals_of_runs);} -#endif - if(cpu_ref_ori_angles) {alignedFree(cpu_ref_ori_angles);} + if(cpu_init_populations) {alignedFree(cpu_init_populations);} + if(cpu_final_populations){alignedFree(cpu_final_populations);} + if(cpu_energies) {alignedFree(cpu_energies);} + if(cpu_result_ligands) {alignedFree(cpu_result_ligands);} + if(cpu_prng_seeds) {alignedFree(cpu_prng_seeds);} -//#if defined (FIXED_POINT_INTERE) -#if 0 - if(cpu_fixedpt64grids) {alignedFree(cpu_fixedpt64grids);} -#endif + #if defined(SINGLE_COPY_POP_ENE) + if(cpu_evals_of_runs) {alignedFree(cpu_evals_of_runs);} + #endif -#if defined (FIXED_POINT_INTERE) - if(mem_KerConstStatic_fixpt64_atom_charges_const) {clReleaseMemObject(mem_KerConstStatic_fixpt64_atom_charges_const);} -#endif - if(mem_KerConstStatic_atom_charges_const) {clReleaseMemObject(mem_KerConstStatic_atom_charges_const);} - if(mem_KerConstStatic_atom_types_const) {clReleaseMemObject(mem_KerConstStatic_atom_types_const);} - if(mem_KerConstStatic_intraE_contributors_const) {clReleaseMemObject(mem_KerConstStatic_intraE_contributors_const);} - - if(mem_KerConstStatic_reqm_const) {clReleaseMemObject(mem_KerConstStatic_reqm_const);} - if(mem_KerConstStatic_reqm_hbond_const) {clReleaseMemObject(mem_KerConstStatic_reqm_hbond_const);} - if(mem_KerConstStatic_atom1_types_reqm_const) {clReleaseMemObject(mem_KerConstStatic_atom1_types_reqm_const);} - if(mem_KerConstStatic_atom2_types_reqm_const) {clReleaseMemObject(mem_KerConstStatic_atom2_types_reqm_const);} - - if(mem_KerConstStatic_VWpars_AC_const) {clReleaseMemObject(mem_KerConstStatic_VWpars_AC_const);} - if(mem_KerConstStatic_VWpars_BD_const) {clReleaseMemObject(mem_KerConstStatic_VWpars_BD_const);} - if(mem_KerConstStatic_dspars_S_const) {clReleaseMemObject(mem_KerConstStatic_dspars_S_const);} - if(mem_KerConstStatic_dspars_V_const) {clReleaseMemObject(mem_KerConstStatic_dspars_V_const);} - if(mem_KerConstStatic_rotlist_const) {clReleaseMemObject(mem_KerConstStatic_rotlist_const);} - if(mem_KerConstStatic_ref_coords_const) {clReleaseMemObject(mem_KerConstStatic_ref_coords_const);} - if(mem_KerConstStatic_rotbonds_moving_vectors_const) {clReleaseMemObject(mem_KerConstStatic_rotbonds_moving_vectors_const);} - if(mem_KerConstStatic_rotbonds_unit_vectors_const) {clReleaseMemObject(mem_KerConstStatic_rotbonds_unit_vectors_const);} - if(mem_KerConstStatic_ref_orientation_quats_const) {clReleaseMemObject(mem_KerConstStatic_ref_orientation_quats_const);} - - if(mem_dockpars_fgrids) {clReleaseMemObject(mem_dockpars_fgrids);} -#if defined(SEPARATE_FGRID_INTERE) - if(mem_dockpars_fgrids2) {clReleaseMemObject(mem_dockpars_fgrids2);} - if(mem_dockpars_fgrids3) {clReleaseMemObject(mem_dockpars_fgrids3);} -#endif + if(cpu_ref_ori_angles) {alignedFree(cpu_ref_ori_angles);} - if(mem_dockpars_conformations_current) {clReleaseMemObject(mem_dockpars_conformations_current);} - if(mem_dockpars_energies_current) {clReleaseMemObject(mem_dockpars_energies_current);} + #if defined (FIXED_POINT_INTERE) + if(mem_KerConstStatic_fixpt64_atom_charges_const) {clReleaseMemObject(mem_KerConstStatic_fixpt64_atom_charges_const);} + #endif -/* - if(mem_dockpars_prng_states) {clReleaseMemObject(mem_dockpars_prng_states);} -*/ -#if defined(SINGLE_COPY_POP_ENE) - if(mem_evals_performed) {clReleaseMemObject(mem_evals_performed);} - if(mem_gens_performed) {clReleaseMemObject(mem_gens_performed);} -#else - if(mem_evals_and_generations_performed) {clReleaseMemObject(mem_evals_and_generations_performed);} -#endif + if(mem_KerConstStatic_atom_charges_const) {clReleaseMemObject(mem_KerConstStatic_atom_charges_const);} + if(mem_KerConstStatic_atom_types_const) {clReleaseMemObject(mem_KerConstStatic_atom_types_const);} + if(mem_KerConstStatic_intraE_contributors_const) {clReleaseMemObject(mem_KerConstStatic_intraE_contributors_const);} + + if(mem_KerConstStatic_reqm_const) {clReleaseMemObject(mem_KerConstStatic_reqm_const);} + if(mem_KerConstStatic_reqm_hbond_const) {clReleaseMemObject(mem_KerConstStatic_reqm_hbond_const);} + if(mem_KerConstStatic_atom1_types_reqm_const) {clReleaseMemObject(mem_KerConstStatic_atom1_types_reqm_const);} + if(mem_KerConstStatic_atom2_types_reqm_const) {clReleaseMemObject(mem_KerConstStatic_atom2_types_reqm_const);} + + if(mem_KerConstStatic_VWpars_AC_const) {clReleaseMemObject(mem_KerConstStatic_VWpars_AC_const);} + if(mem_KerConstStatic_VWpars_BD_const) {clReleaseMemObject(mem_KerConstStatic_VWpars_BD_const);} + if(mem_KerConstStatic_dspars_S_const) {clReleaseMemObject(mem_KerConstStatic_dspars_S_const);} + if(mem_KerConstStatic_dspars_V_const) {clReleaseMemObject(mem_KerConstStatic_dspars_V_const);} + if(mem_KerConstStatic_rotlist_const) {clReleaseMemObject(mem_KerConstStatic_rotlist_const);} + if(mem_KerConstStatic_ref_coords_const) {clReleaseMemObject(mem_KerConstStatic_ref_coords_const);} + if(mem_KerConstStatic_rotbonds_moving_vectors_const) {clReleaseMemObject(mem_KerConstStatic_rotbonds_moving_vectors_const);} + if(mem_KerConstStatic_rotbonds_unit_vectors_const) {clReleaseMemObject(mem_KerConstStatic_rotbonds_unit_vectors_const);} + if(mem_KerConstStatic_ref_orientation_quats_const) {clReleaseMemObject(mem_KerConstStatic_ref_orientation_quats_const);} + + if(mem_dockpars_fgrids) {clReleaseMemObject(mem_dockpars_fgrids);} + + #if defined(SEPARATE_FGRID_INTERE) + if(mem_dockpars_fgrids2) {clReleaseMemObject(mem_dockpars_fgrids2);} + if(mem_dockpars_fgrids3) {clReleaseMemObject(mem_dockpars_fgrids3);} + #endif + + if(mem_dockpars_conformations_current) {clReleaseMemObject(mem_dockpars_conformations_current);} + if(mem_dockpars_energies_current) {clReleaseMemObject(mem_dockpars_energies_current);} + + #if defined(SINGLE_COPY_POP_ENE) + if(mem_evals_performed) {clReleaseMemObject(mem_evals_performed);} + if(mem_gens_performed) {clReleaseMemObject(mem_gens_performed);} + #else + if(mem_evals_and_generations_performed) {clReleaseMemObject(mem_evals_and_generations_performed);} + #endif } // Helper functions to display parameters returned by OpenCL queries @@ -1518,16 +1260,19 @@ static void device_info_ulong( cl_device_id device, cl_device_info param, const clGetDeviceInfo(device, param, sizeof(cl_ulong), &a, NULL); printf("%-40s = %lu\n", name, a); } + static void device_info_uint( cl_device_id device, cl_device_info param, const char* name) { cl_uint a; clGetDeviceInfo(device, param, sizeof(cl_uint), &a, NULL); printf("%-40s = %u\n", name, a); } + static void device_info_bool( cl_device_id device, cl_device_info param, const char* name) { cl_bool a; clGetDeviceInfo(device, param, sizeof(cl_bool), &a, NULL); printf("%-40s = %s\n", name, (a?"true":"false")); } + static void device_info_string( cl_device_id device, cl_device_info param, const char* name) { char a[STRING_BUFFER_LEN]; clGetDeviceInfo(device, param, STRING_BUFFER_LEN, &a, NULL); @@ -1537,39 +1282,39 @@ static void device_info_string( cl_device_id device, cl_device_info param, const // Query and display OpenCL information on device and runtime environment static void display_device_info( cl_device_id device ) { - printf("Querying device for info:\n"); - printf("========================\n"); - device_info_string(device, CL_DEVICE_NAME, "CL_DEVICE_NAME"); - device_info_string(device, CL_DEVICE_VENDOR, "CL_DEVICE_VENDOR"); - device_info_uint(device, CL_DEVICE_VENDOR_ID, "CL_DEVICE_VENDOR_ID"); - device_info_string(device, CL_DEVICE_VERSION, "CL_DEVICE_VERSION"); - device_info_string(device, CL_DRIVER_VERSION, "CL_DRIVER_VERSION"); - device_info_uint(device, CL_DEVICE_ADDRESS_BITS, "CL_DEVICE_ADDRESS_BITS"); - device_info_bool(device, CL_DEVICE_AVAILABLE, "CL_DEVICE_AVAILABLE"); - device_info_bool(device, CL_DEVICE_ENDIAN_LITTLE, "CL_DEVICE_ENDIAN_LITTLE"); - device_info_ulong(device, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "CL_DEVICE_GLOBAL_MEM_CACHE_SIZE"); - device_info_ulong(device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE"); - device_info_ulong(device, CL_DEVICE_GLOBAL_MEM_SIZE, "CL_DEVICE_GLOBAL_MEM_SIZE"); - device_info_bool(device, CL_DEVICE_IMAGE_SUPPORT, "CL_DEVICE_IMAGE_SUPPORT"); - device_info_ulong(device, CL_DEVICE_LOCAL_MEM_SIZE, "CL_DEVICE_LOCAL_MEM_SIZE"); - device_info_ulong(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, "CL_DEVICE_MAX_CLOCK_FREQUENCY"); - device_info_ulong(device, CL_DEVICE_MAX_COMPUTE_UNITS, "CL_DEVICE_MAX_COMPUTE_UNITS"); - device_info_ulong(device, CL_DEVICE_MAX_CONSTANT_ARGS, "CL_DEVICE_MAX_CONSTANT_ARGS"); - device_info_ulong(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE"); - device_info_uint(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS"); - device_info_uint(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, "CL_DEVICE_MEM_BASE_ADDR_ALIGN"); - device_info_uint(device, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE"); - device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR"); - device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT"); - device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT"); - device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG"); - device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT"); - device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE"); - - { - cl_command_queue_properties ccp; - clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &ccp, NULL); - printf("%-40s = %s\n", "Command queue out of order? ", ((ccp & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)?"true":"false")); - printf("%-40s = %s\n", "Command queue profiling enabled? ", ((ccp & CL_QUEUE_PROFILING_ENABLE)?"true":"false")); - } + printf("Querying device for info:\n"); + printf("========================\n"); + device_info_string(device, CL_DEVICE_NAME, "CL_DEVICE_NAME"); + device_info_string(device, CL_DEVICE_VENDOR, "CL_DEVICE_VENDOR"); + device_info_uint(device, CL_DEVICE_VENDOR_ID, "CL_DEVICE_VENDOR_ID"); + device_info_string(device, CL_DEVICE_VERSION, "CL_DEVICE_VERSION"); + device_info_string(device, CL_DRIVER_VERSION, "CL_DRIVER_VERSION"); + device_info_uint(device, CL_DEVICE_ADDRESS_BITS, "CL_DEVICE_ADDRESS_BITS"); + device_info_bool(device, CL_DEVICE_AVAILABLE, "CL_DEVICE_AVAILABLE"); + device_info_bool(device, CL_DEVICE_ENDIAN_LITTLE, "CL_DEVICE_ENDIAN_LITTLE"); + device_info_ulong(device, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "CL_DEVICE_GLOBAL_MEM_CACHE_SIZE"); + device_info_ulong(device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE"); + device_info_ulong(device, CL_DEVICE_GLOBAL_MEM_SIZE, "CL_DEVICE_GLOBAL_MEM_SIZE"); + device_info_bool(device, CL_DEVICE_IMAGE_SUPPORT, "CL_DEVICE_IMAGE_SUPPORT"); + device_info_ulong(device, CL_DEVICE_LOCAL_MEM_SIZE, "CL_DEVICE_LOCAL_MEM_SIZE"); + device_info_ulong(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, "CL_DEVICE_MAX_CLOCK_FREQUENCY"); + device_info_ulong(device, CL_DEVICE_MAX_COMPUTE_UNITS, "CL_DEVICE_MAX_COMPUTE_UNITS"); + device_info_ulong(device, CL_DEVICE_MAX_CONSTANT_ARGS, "CL_DEVICE_MAX_CONSTANT_ARGS"); + device_info_ulong(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE"); + device_info_uint(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS"); + device_info_uint(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, "CL_DEVICE_MEM_BASE_ADDR_ALIGN"); + device_info_uint(device, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE"); + device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR"); + device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT"); + device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT"); + device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG"); + device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT"); + device_info_uint(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, "CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE"); + + { + cl_command_queue_properties ccp; + clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &ccp, NULL); + printf("%-40s = %s\n", "Command queue out of order? ", ((ccp & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)?"true":"false")); + printf("%-40s = %s\n", "Command queue profiling enabled? ", ((ccp & CL_QUEUE_PROFILING_ENABLE)?"true":"false")); + } } -- GitLab