Commit 740a515a authored by Leonardo Solis's avatar Leonardo Solis
Browse files

#31, party ported opencl setup: several changes

compiles but errors are detected in swemu


Former-commit-id: c15a9736
parent fdda9657
......@@ -113,7 +113,7 @@ CXXFLAGS:=-Wall -O0 -g -std=c++14 $(OTHER_FLAGS)
# Host Application
hostbin_SRCS=$(wildcard ../common/src/AOCLUtils/*.cpp wrapcl/src/*.cpp host/src/*.cpp) \
hostbin_SRCS=$(wildcard ./host/src/*.cpp) \
$(oclHelper_SRCS) \
$(xcl2_SRCS)
......@@ -391,8 +391,8 @@ kerrpt-sw:
# Compile for sw and hw emulation
kerswemu:
make all TARGETS=sw_emu DEVICES=$(BOARD)
cp ./xclbin/$(XCLBINS).sw_emu.*.xclbin .
mv ./$(XCLBINS).sw_emu.*.xclbin ./$(XCLBINS).xclbin
#cp ./xclbin/$(XCLBINS).sw_emu.*.xclbin .
#mv ./$(XCLBINS).sw_emu.*.xclbin ./$(XCLBINS).xclbin
@echo $(newline)
@echo "==============================================================================="
@echo "INFO: sw emulation file created under: \"./xclbin/$(XCLBINS).sw_emu.*.xclbin\" "
......
......@@ -26,6 +26,10 @@
//#include "ImportSource.h"
#include "BufferObjects.h"
#include "xcl2.hpp"
#include <vector>
using std::vector;
#define ELAPSEDSECS(stop,start) ((float) stop-start)/((float) CLOCKS_PER_SEC)
int docking_with_gpu(const Gridinfo* mygrid,
......
......@@ -16,6 +16,10 @@
#include "miscellaneous.h"
// Added for allocating fgrids
#include <vector>
using std::vector;
#define getvalue_4Darr(mempoi, grinf, t, z, y, x) *(mempoi + (grinf).size_xyz[0] * (y + (grinf).size_xyz[1] * (z + (grinf).size_xyz[2]*t)) + x)
#define getvalue_4Darr_withsize(mempoi, gridsize_xyz, t, z, y, x) *(mempoi + gridsize_xyz[0]*(y + gridsize_xyz[1] * (z + gridsize_xyz[2]*t)) + x)
//The macro helps to access the grid point values
......@@ -41,6 +45,6 @@ typedef struct
int get_gridinfo(const char*, Gridinfo*);
int get_gridvalues_f(const Gridinfo* mygrid,
float** fgrids);
float* fgrids);
#endif /* PROCESSGRID_H_ */
......@@ -26,8 +26,6 @@ int main(int argc, char* argv[])
Liganddata myligand_init;
Dockpars mypars;
float* floatgrids;
clock_t clock_start_program, clock_stop_program;
......@@ -57,6 +55,11 @@ int main(int argc, char* argv[])
if (get_gridinfo(mypars.fldfile, &mygrid) != 0)
return 1;
//allocating CPU memory for floatgrids
size_t size_fgrid_nelems = (mygrid.num_of_atypes+2) * mygrid.size_xyz[0] * mygrid.size_xyz[1] * mygrid.size_xyz[2];
vector<float,aligned_allocator<float>> floatgrids(size_fgrid_nelems);
// Filling the atom types filed of myligand according to the grid types
if (init_liganddata(mypars.ligandfile, &myligand_init, &mygrid) != 0)
return 1;
......@@ -66,7 +69,7 @@ int main(int argc, char* argv[])
return 1;
//Reading the grid files and storing values in the memory region pointed by floatgrids
if (get_gridvalues_f(&mygrid, &floatgrids) != 0)
if (get_gridvalues_f(&mygrid, floatgrids.data()) != 0)
return 1;
//------------------------------------------------------------
......@@ -85,7 +88,7 @@ int main(int argc, char* argv[])
print_ref_lig_energies_f(myligand_init,
mypars.smooth,
mygrid,
floatgrids,
floatgrids.data(),
mypars.coeffs.scaled_AD4_coeff_elec,
mypars.coeffs.AD4_coeff_desolv,
mypars.qasp);
......@@ -94,11 +97,9 @@ int main(int argc, char* argv[])
//------------------------------------------------------------
// Starting Docking
//------------------------------------------------------------
if (docking_with_gpu(&mygrid, floatgrids, &mypars, &myligand_init, &argc, argv, clock_start_program) != 0)
if (docking_with_gpu(&mygrid, floatgrids.data(), &mypars, &myligand_init, &argc, argv, clock_start_program) != 0)
return 1;
if(floatgrids) {free(floatgrids);}
/*
clock_stop_program = clock();
printf("Program run time: %.3f sec\n", ELAPSEDSECS(clock_stop_program, clock_start_program));
......
......@@ -16,57 +16,11 @@
#include <cstring>
#include "CL/opencl.h"
/*
#include "AOCLUtils/aocl_utils.h"
using namespace aocl_utils;
*/
#define STRING_BUFFER_LEN 1024
#include "xcl2.hpp"
#include <vector>
using std::vector;
// Function prototypes
void cleanup();
static void device_info_ulong( cl_device_id device, cl_device_info param, const char* name);
static void device_info_uint ( cl_device_id device, cl_device_info param, const char* name);
static void device_info_bool ( cl_device_id device, cl_device_info param, const char* name);
static void device_info_string( cl_device_id device, cl_device_info param, const char* name);
static void display_device_info( cl_device_id device );
//// --------------------------------
//// Host constant struct
//// --------------------------------
Dockparameters dockpars;
kernelconstant_static KerConstStatic;
//// --------------------------------
//// Device memory buffers
//// --------------------------------
cl_mem mem_KerConstStatic_InterE_atom_charges_const;
cl_mem mem_KerConstStatic_InterE_atom_types_const;
cl_mem mem_KerConstStatic_IntraE_atom_charges_const;
cl_mem mem_KerConstStatic_IntraE_atom_types_const;
cl_mem mem_KerConstStatic_intraE_contributors_const;
cl_mem mem_KerConstStatic_reqm_const;
cl_mem mem_KerConstStatic_reqm_hbond_const;
cl_mem mem_KerConstStatic_atom1_types_reqm_const;
cl_mem mem_KerConstStatic_atom2_types_reqm_const;
cl_mem mem_KerConstStatic_VWpars_AC_const;
cl_mem mem_KerConstStatic_VWpars_BD_const;
cl_mem mem_KerConstStatic_dspars_S_const;
cl_mem mem_KerConstStatic_dspars_V_const;
cl_mem mem_KerConstStatic_rotlist_const;
cl_mem mem_KerConstStatic_ref_coords_const;
cl_mem mem_KerConstStatic_rotbonds_moving_vectors_const;
cl_mem mem_KerConstStatic_rotbonds_unit_vectors_const;
cl_mem mem_KerConstStatic_ref_orientation_quats_const;
/* // Nr elements // Nr bytes
cl_mem mem_atom_charges_const; // float [MAX_NUM_OF_ATOMS]; // 90 = 90 //360
......@@ -85,22 +39,6 @@ cl_mem mem_rotbonds_unit_vectors_const; // float [3*MAX_NUM_OF_ROTBONDS]; // 3*
cl_mem mem_ref_orientation_quats_const; // float [4*MAX_NUM_OF_RUNS]; // 4*100 = 400 //1600
*/
cl_mem mem_dockpars_fgrids;
cl_mem mem_dockpars_conformations_current;
cl_mem mem_dockpars_energies_current;
cl_mem mem_evals_performed;
cl_mem mem_gens_performed;
#if !defined(SW_EMU)
// IMPORTANT: enable this dummy global argument only for "hw" build.
// Check ../common_xilinx/utility/boards.mk
// https://forums.xilinx.com/t5/SDAccel/ERROR-KernelCheck-83-114-in-sdx-2017-4/td-p/818135
cl_mem mem_dummy;
#endif
//// --------------------------------
//// Docking
//// --------------------------------
......@@ -321,7 +259,8 @@ filled with clock() */
Liganddata myligand_reference;
//allocating GPU memory for floatgrids,
size_t size_floatgrids_nbytes = (sizeof(float)) * (mygrid->num_of_atypes+2) * (mygrid->size_xyz[0]) * (mygrid->size_xyz[1]) * (mygrid->size_xyz[2]);
size_t size_floatgrids_nbytes = sizeof(float) * (mygrid->num_of_atypes+2) *
(mygrid->size_xyz[0]) * (mygrid->size_xyz[1]) * (mygrid->size_xyz[2]);
size_t size_populations_nbytes = mypars->num_of_runs * mypars->pop_size * ACTUAL_GENOTYPE_LENGTH * sizeof(float);
size_t size_populations_nelems = mypars->num_of_runs * mypars->pop_size * ACTUAL_GENOTYPE_LENGTH;
......@@ -347,7 +286,7 @@ filled with clock() */
//generating initial populations and random orientation angles of reference ligand
//(ligand will be moved to origo and scaled as well)
myligand_reference = *myligand_init;
gen_initpop_and_reflig(mypars, cpu_init_populations, cpu_ref_ori_angles, &myligand_reference, mygrid);
gen_initpop_and_reflig(mypars, cpu_init_populations.data(), cpu_ref_ori_angles.data(), &myligand_reference, mygrid);
//allocating memory in CPU for pseudorandom number generator seeds
const unsigned int num_of_prng_blocks = 25;
......@@ -371,17 +310,19 @@ filled with clock() */
// allocating memory in CPU for generation counters
vector<int,aligned_allocator<int>> cpu_gens_of_runs (size_evals_of_runs_nelems, 0);
//preparing the constant data fields for the GPU
// ----------------------------------------------------------------------
//preparing the constant data fields for the FPGA
// -----------------------------------------------------------------------------------------------------
// The original function does CUDA calls initializing const Kernel data.
// We create a struct to hold those constants
// and return them <here> (<here> = where prepare_const_fields_for_gpu() is called),
// so we can send them to Kernels from <here>, instead of from calcenergy.cpp as originally.
// ----------------------------------------------------------------------
if (prepare_conststatic_fields_for_gpu(&myligand_reference, mypars, cpu_ref_ori_angles, &KerConstStatic) == 1)
// -----------------------------------------------------------------------------------------------------
kernelconstant_static KerConstStatic;
if (prepare_conststatic_fields_for_gpu(&myligand_reference, mypars, cpu_ref_ori_angles.data(), &KerConstStatic) == 1)
return 1;
//preparing parameter struct
Dockparameters dockpars;
dockpars.num_of_atoms = ((unsigned char) myligand_reference.num_of_atoms);
dockpars.num_of_atypes = ((unsigned char) myligand_reference.num_of_atypes);
dockpars.num_of_intraE_contributors = ((unsigned int) myligand_reference.num_of_intraE_contributors);
......@@ -395,10 +336,8 @@ filled with clock() */
dockpars.rotbondlist_length = ((unsigned int) NUM_OF_THREADS_PER_BLOCK*(myligand_reference.num_of_rotcyc));
dockpars.coeff_elec = ((float) mypars->coeffs.scaled_AD4_coeff_elec);
dockpars.coeff_desolv = ((float) mypars->coeffs.AD4_coeff_desolv);
dockpars.num_of_energy_evals = (unsigned int) mypars->num_of_energy_evals;
dockpars.num_of_generations = (unsigned int) mypars->num_of_generations;
dockpars.pop_size = (unsigned int) mypars->pop_size;
dockpars.num_of_genes = (unsigned int)(myligand_reference.num_of_rotbonds + 6);
dockpars.tournament_rate = (mypars->tournament_rate)/100;
......@@ -427,82 +366,143 @@ filled with clock() */
const unsigned int mul_tmp3 = (dockpars.num_of_atypes + 1) * dockpars.g3;
// -----------------------------------------------------------------------------------------------------
// Hardware specific
// Specifiying exact memory bank from host code
// Only valid if 4 banks are available (AWS)
cl_mem_ext_ptr_t d_bank0_ext; // Krnl_GA
cl_mem_ext_ptr_t d_bank1_ext; // Krnl_Conform
cl_mem_ext_ptr_t d_bank2_ext; // Krnl_InterE
cl_mem_ext_ptr_t d_bank3_ext; // Krnl_IntraE
d_bank0_ext.flags = XCL_MEM_DDR_BANK0;
d_bank0_ext.obj = NULL;
d_bank0_ext.param = 0;
d_bank1_ext.flags = XCL_MEM_DDR_BANK1;
d_bank1_ext.obj = NULL;
d_bank1_ext.param = 0;
d_bank2_ext.flags = XCL_MEM_DDR_BANK2;
d_bank2_ext.obj = NULL;
d_bank2_ext.param = 0;
d_bank3_ext.flags = XCL_MEM_DDR_BANK3;
d_bank3_ext.obj = NULL;
d_bank3_ext.param = 0;
// Replacing common buffer creation with
// a Xilinx-specific where DDR banks can be specified
mallocBufferObject(context,CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX,size_populations, &d_bank0_ext, &mem_dockpars_conformations_current); // GA
mallocBufferObject(context,CL_MEM_READ_WRITE | CL_MEM_EXT_PTR_XILINX,size_energies, &d_bank0_ext, &mem_dockpars_energies_current); // GA
mallocBufferObject(context,CL_MEM_WRITE_ONLY | CL_MEM_EXT_PTR_XILINX,size_evals_of_runs, &d_bank0_ext, &mem_evals_performed); // GA
mallocBufferObject(context,CL_MEM_WRITE_ONLY | CL_MEM_EXT_PTR_XILINX,size_evals_of_runs, &d_bank0_ext, &mem_gens_performed); // GA
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ROTATIONS*sizeof(int), &d_bank0_ext/*&d_bank1_ext*/, &mem_KerConstStatic_rotlist_const); // Conform
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ATOMS*sizeof(cl_float3), &d_bank0_ext/*&d_bank1_ext*/, &mem_KerConstStatic_ref_coords_const); // Conform
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ROTBONDS*sizeof(cl_float3), &d_bank0_ext/*&d_bank1_ext*/, &mem_KerConstStatic_rotbonds_moving_vectors_const); // Conform
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ROTBONDS*sizeof(cl_float3), &d_bank0_ext/*&d_bank1_ext*/, &mem_KerConstStatic_rotbonds_unit_vectors_const); // Conform
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_RUNS*sizeof(cl_float4), &d_bank0_ext/*&d_bank1_ext*/, &mem_KerConstStatic_ref_orientation_quats_const); // Conform
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ATOMS*sizeof(float), &d_bank1_ext/*&d_bank2_ext*/, &mem_KerConstStatic_InterE_atom_charges_const); // InterE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ATOMS*sizeof(char), &d_bank1_ext/*&d_bank2_ext*/, &mem_KerConstStatic_InterE_atom_types_const); // InterE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX,size_floatgrids, &d_bank1_ext/*&d_bank2_ext*/, &mem_dockpars_fgrids); // InterE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ATOMS*sizeof(float), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_IntraE_atom_charges_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ATOMS*sizeof(char), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_IntraE_atom_types_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_INTRAE_CONTRIBUTORS*sizeof(cl_char3), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_intraE_contributors_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, ATYPE_NUM*sizeof(float), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_reqm_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, ATYPE_NUM*sizeof(float), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_reqm_hbond_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, ATYPE_NUM*sizeof(unsigned int), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_atom1_types_reqm_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, ATYPE_NUM*sizeof(unsigned int), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_atom2_types_reqm_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES*sizeof(float), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_VWpars_AC_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES*sizeof(float), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_VWpars_BD_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ATYPES*sizeof(float), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_dspars_S_const); // IntraE
mallocBufferObject(context,CL_MEM_READ_ONLY | CL_MEM_EXT_PTR_XILINX, MAX_NUM_OF_ATYPES*sizeof(float), &d_bank1_ext/*&d_bank3_ext*/, &mem_KerConstStatic_dspars_V_const); // IntraE
#if 0
// Hardware specific configuration (valid if 4 DDR banks are available, e.g.: AWS)
// Specifiying precisely which DDR memory bank is
// being pointed to by a kernel globgal memory pointer.
cl_mem_ext_ptr_t d_bank0_ext; // Ideally: DDR bank0 <-> Krnl_GA
cl_mem_ext_ptr_t d_bank1_ext; // Ideally: DDR bank1 <-> Krnl_Conform
cl_mem_ext_ptr_t d_bank2_ext; // Ideally: DDR bank2 <-> Krnl_InterE
cl_mem_ext_ptr_t d_bank3_ext; // Ideally: DDR bank3 <-> Krnl_IntraE
d_bank0_ext.flags = XCL_MEM_DDR_BANK0; d_bank0_ext.obj = NULL; d_bank0_ext.param = 0;
d_bank1_ext.flags = XCL_MEM_DDR_BANK1; d_bank1_ext.obj = NULL; d_bank1_ext.param = 0;
d_bank2_ext.flags = XCL_MEM_DDR_BANK2; d_bank2_ext.obj = NULL; d_bank2_ext.param = 0;
d_bank3_ext.flags = XCL_MEM_DDR_BANK3; d_bank3_ext.obj = NULL; d_bank3_ext.param = 0;
#endif
// -----------------------------------------------------------------------------------------------------
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_InterE_atom_charges_const, &KerConstStatic.atom_charges_const[0], MAX_NUM_OF_ATOMS*sizeof(float));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_InterE_atom_types_const, &KerConstStatic.atom_types_const[0], MAX_NUM_OF_ATOMS*sizeof(char));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_IntraE_atom_charges_const, &KerConstStatic.atom_charges_const[0], MAX_NUM_OF_ATOMS*sizeof(float));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_IntraE_atom_types_const, &KerConstStatic.atom_types_const[0], MAX_NUM_OF_ATOMS*sizeof(char));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_intraE_contributors_const, &KerConstStatic.intraE_contributors_const[0], MAX_INTRAE_CONTRIBUTORS*sizeof(cl_char3));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_reqm_const, &KerConstStatic.reqm_const, ATYPE_NUM*sizeof(float));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_reqm_hbond_const, &KerConstStatic.reqm_hbond_const, ATYPE_NUM*sizeof(float));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_atom1_types_reqm_const, &KerConstStatic.atom1_types_reqm_const, ATYPE_NUM*sizeof(unsigned int));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_atom2_types_reqm_const, &KerConstStatic.atom2_types_reqm_const, ATYPE_NUM*sizeof(unsigned int));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_VWpars_AC_const, &KerConstStatic.VWpars_AC_const[0], MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES*sizeof(float));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_VWpars_BD_const, &KerConstStatic.VWpars_BD_const[0], MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES*sizeof(float));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_dspars_S_const, &KerConstStatic.dspars_S_const[0], MAX_NUM_OF_ATYPES*sizeof(float));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_dspars_V_const, &KerConstStatic.dspars_V_const[0], MAX_NUM_OF_ATYPES*sizeof(float));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_rotlist_const, &KerConstStatic.rotlist_const[0], MAX_NUM_OF_ROTATIONS*sizeof(int));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_ref_coords_const, &KerConstStatic.ref_coords_const[0], MAX_NUM_OF_ATOMS*sizeof(cl_float3));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_rotbonds_moving_vectors_const, &KerConstStatic.rotbonds_moving_vectors_const[0], MAX_NUM_OF_ROTBONDS*sizeof(cl_float3));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_rotbonds_unit_vectors_const, &KerConstStatic.rotbonds_unit_vectors_const[0], MAX_NUM_OF_ROTBONDS*sizeof(cl_float3));
memcopyBufferObjectToDevice(command_queue_ga,mem_KerConstStatic_ref_orientation_quats_const, &KerConstStatic.ref_orientation_quats_const[0], MAX_NUM_OF_RUNS*sizeof(cl_float4));
memcopyBufferObjectToDevice(command_queue_ga,mem_dockpars_fgrids, cpu_floatgrids, size_floatgrids);
// These commands will allocate memory on the FPGA. The cl::Buffer objects can
// be used to reference the memory locations on the device. The cl::Buffer
// object cannot be referenced directly and must be passed to other OpenCL
// functions.
// Krnl_GA buffers
cl::Buffer mem_dockpars_conformations_current (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
size_populations_nbytes, cpu_init_populations.data());
cl::Buffer mem_dockpars_energies_current (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
size_energies_nbytes, cpu_energies.data());
cl::Buffer mem_evals_performed (context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
size_evals_of_runs_nbytes, cpu_evals_of_runs.data());
cl::Buffer mem_gens_performed (context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
size_evals_of_runs_nbytes, cpu_gens_of_runs.data());
// Krnl_Conform buffers
cl::Buffer mem_KerConstStatic_rotlist_const (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ROTATIONS*sizeof(int), &KerConstStatic.rotlist_const[0]);
cl::Buffer mem_KerConstStatic_ref_coords_const (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ATOMS*sizeof(cl_float3), &KerConstStatic.ref_coords_const[0]);
cl::Buffer mem_KerConstStatic_rotbonds_moving_vectors_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ROTBONDS*sizeof(cl_float3), &KerConstStatic.rotbonds_moving_vectors_const[0]);
cl::Buffer mem_KerConstStatic_rotbonds_unit_vectors_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ROTBONDS*sizeof(cl_float3), &KerConstStatic.rotbonds_unit_vectors_const[0]);
cl::Buffer mem_KerConstStatic_ref_orientation_quats_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_RUNS*sizeof(cl_float4), &KerConstStatic.ref_orientation_quats_const[0]);
// Krnl_InterE buffers
cl::Buffer mem_dockpars_fgrids (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
size_floatgrids_nbytes, cpu_floatgrids);
cl::Buffer mem_KerConstStatic_InterE_atom_charges_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ATOMS*sizeof(float), &KerConstStatic.atom_charges_const[0]);
cl::Buffer mem_KerConstStatic_InterE_atom_types_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ATOMS*sizeof(char), &KerConstStatic.atom_types_const[0]);
// Krnl_IntraE buffers
cl::Buffer mem_KerConstStatic_IntraE_atom_charges_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ATOMS*sizeof(float), &KerConstStatic.atom_charges_const[0]);
cl::Buffer mem_KerConstStatic_IntraE_atom_types_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ATOMS*sizeof(char), &KerConstStatic.atom_types_const[0]);
cl::Buffer mem_KerConstStatic_intraE_contributors_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_INTRAE_CONTRIBUTORS*sizeof(cl_char3), &KerConstStatic.intraE_contributors_const[0]);
cl::Buffer mem_KerConstStatic_reqm_const (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
ATYPE_NUM*sizeof(float), &KerConstStatic.reqm_const);
cl::Buffer mem_KerConstStatic_reqm_hbond_const (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
ATYPE_NUM*sizeof(float), &KerConstStatic.reqm_hbond_const);
cl::Buffer mem_KerConstStatic_atom1_types_reqm_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
ATYPE_NUM*sizeof(unsigned int), &KerConstStatic.atom1_types_reqm_const);
cl::Buffer mem_KerConstStatic_atom2_types_reqm_const
(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
ATYPE_NUM*sizeof(unsigned int), &KerConstStatic.atom2_types_reqm_const);
cl::Buffer mem_KerConstStatic_VWpars_AC_const (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES*sizeof(float), &KerConstStatic.VWpars_AC_const[0]);
cl::Buffer mem_KerConstStatic_VWpars_BD_const (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES*sizeof(float), &KerConstStatic.VWpars_BD_const[0]);
cl::Buffer mem_KerConstStatic_dspars_S_const (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ATYPES*sizeof(float), &KerConstStatic.dspars_S_const[0]);
cl::Buffer mem_KerConstStatic_dspars_V_const (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
MAX_NUM_OF_ATYPES*sizeof(float), &KerConstStatic.dspars_V_const[0]);
#if !defined(SW_EMU)
//allocating CPU memory for dummy data (one integer)
vector<int,aligned_allocator<int>> cpu_dummy (1);
// IMPORTANT: enable this dummy global argument only for "hw" build.
// Check ../common_xilinx/utility/boards.mk
// https://forums.xilinx.com/t5/SDAccel/ERROR-KernelCheck-83-114-in-sdx-2017-4/td-p/818135
cl::Buffer mem_dummy (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, sizeof(int), cpu_dummy.data();
#endif
// -----------------------------------------------------------------------------------------------------
//Separate Read/write Buffer vector is needed to migrate data between host/device
std::vector<cl::Memory> inBufVec, outBufVec;
// Krnl_GA
inBufVec.push_back(mem_dockpars_conformations_current); // RD & WR
inBufVec.push_back(mem_dockpars_energies_current); // RD & WR
// Krnl_Conform
inBufVec.push_back(mem_KerConstStatic_rotlist_const);
inBufVec.push_back(mem_KerConstStatic_ref_coords_const);
inBufVec.push_back(mem_KerConstStatic_rotbonds_moving_vectors_const);
inBufVec.push_back(mem_KerConstStatic_rotbonds_unit_vectors_const);
inBufVec.push_back(mem_KerConstStatic_ref_orientation_quats_const);
// Krnl_InterE
inBufVec.push_back(mem_dockpars_fgrids);
inBufVec.push_back(mem_KerConstStatic_InterE_atom_charges_const);
inBufVec.push_back(mem_KerConstStatic_InterE_atom_types_const);
// Krnl_IntraE
inBufVec.push_back(mem_KerConstStatic_IntraE_atom_charges_const);
inBufVec.push_back(mem_KerConstStatic_IntraE_atom_types_const);
inBufVec.push_back(mem_KerConstStatic_intraE_contributors_const);
inBufVec.push_back(mem_KerConstStatic_reqm_const);
inBufVec.push_back(mem_KerConstStatic_reqm_hbond_const);
inBufVec.push_back(mem_KerConstStatic_atom1_types_reqm_const);
inBufVec.push_back(mem_KerConstStatic_atom2_types_reqm_const);
inBufVec.push_back(mem_KerConstStatic_VWpars_AC_const);
inBufVec.push_back(mem_KerConstStatic_VWpars_BD_const);
inBufVec.push_back(mem_KerConstStatic_dspars_S_const);
inBufVec.push_back(mem_KerConstStatic_dspars_V_const);
// Krnl_GA
outBufVec.push_back(mem_dockpars_conformations_current); // RD & WR
outBufVec.push_back(mem_dockpars_energies_current); // RD & WR
outBufVec.push_back(mem_evals_performed);
outBufVec.push_back(mem_gens_performed);
// -----------------------------------------------------------------------------------------------------
// These commands will load CPU-sources vectors from the host
// application and into cl::Buffer objects.
// The data will be be transferred from system memory
// over PCIe to the FPGA on-board DDR memory.
command_queue_ga.enqueueMigrateMemObjects(inBufVec,0/* 0 means from host*/);
// -----------------------------------------------------------------------------------------------------
clock_start_docking = clock();
......@@ -525,6 +525,7 @@ filled with clock() */
kernel_ga.setArg(narg++, dockpars.crossover_rate);
kernel_ga.setArg(narg++, dockpars.num_of_lsentities);
kernel_ga.setArg(narg++, dockpars.num_of_genes);
// Other kernel args are configured at every docking run
#endif
#ifdef ENABLE_KRNL_CONFORM
......@@ -537,10 +538,7 @@ filled with clock() */
kernel_conform.setArg(narg++, dockpars.num_of_atoms);
kernel_conform.setArg(narg++, dockpars.num_of_genes);
kernel_conform.setArg(narg++, mem_KerConstStatic_ref_orientation_quats_const);
kernel_conform.setArg(narg++, KerConstDynamic.ref_orientation_quats_const[0]);
kernel_conform.setArg(narg++, KerConstDynamic.ref_orientation_quats_const[1]);
kernel_conform.setArg(narg++, KerConstDynamic.ref_orientation_quats_const[2]);
kernel_conform.setArg(narg++, KerConstDynamic.ref_orientation_quats_const[3]);
// Other kernel args are configured at every docking run
#endif
unsigned char gridsizex_minus1 = dockpars.gridsize_x - 1;
......@@ -590,312 +588,306 @@ filled with clock() */
#endif
#ifdef ENABLE_KRNL_PRNG_BT_USHORT_FLOAT
setKernelArg(kernel_prng_bt_ushort_float,2, sizeof(unsigned int), &dockpars.pop_size);
// Other kernel args are configured at every docking run
kernel_prng_bt_ushort_float.setArg(2, dockpars.pop_size);
#if !defined(SW_EMU)
setKernelArg(kernel_prng_bt_ushort_float,3, sizeof(mem_dummy), &mem_dummy);
kernel_prng_bt_ushort_float.setArg(3, mem_dummy);
#endif
#endif
#endif // End of ENABLE_KRNL_PRNG_BT_USHORT_FLOAT
#ifdef ENABLE_KRNL_PRNG_GG_UCHAR
setKernelArg(kernel_prng_gg_uchar,1, sizeof(unsigned char), &dockpars.num_of_genes);
// Other kernel args are configured at every docking run
kernel_prng_gg_uchar.setArg(1, dockpars.num_of_genes);
#if !defined(SW_EMU)
setKernelArg(kernel_prng_gg_uchar,2, sizeof(mem_dummy), &mem_dummy);
kernel_prng_gg_uchar.setArg(2, mem_dummy);
#endif
#endif
#endif // End of ENABLE_KRNL_PRNG_GG_UCHAR
#ifdef ENABLE_KRNL_PRNG_GG_FLOAT
setKernelArg(kernel_prng_gg_float,1, sizeof(unsigned char), &dockpars.num_of_genes);
// Other kernel args are configured at every docking run
kernel_prng_gg_float.setArg(1, dockpars.num_of_genes);
#if !defined(SW_EMU)
setKernelArg(kernel_prng_gg_float,2, sizeof(mem_dummy), &mem_dummy);
kernel_prng_gg_float.setArg(2, mem_dummy);
#endif
#endif
#endif // End of ENABLE_KRNL_PRNG_GG_FLOAT
#ifdef ENABLE_KRNL_PRNG_LS123_USHORT
setKernelArg(kernel_prng_ls123_ushort,9, sizeof(unsigned int), &dockpars.pop_size);
// Other kernel args are configured at every docking run
kernel_prng_ls123_ushort.setArg(9, dockpars.pop_size);
#if !defined(SW_EMU)
setKernelArg(kernel_prng_ls123_ushort,10, sizeof(mem_dummy), &mem_dummy);
kernel_prng_ls123_ushort.setArg(10, mem_dummy);
#endif
#endif
#endif // End of ENABLE_KRNL_PRNG_LS123_USHORT
#ifdef ENABLE_KRNL_PRNG_LS_FLOAT
setKernelArg(kernel_prng_ls_float,1, sizeof(unsigned char), &dockpars.num_of_genes);
// Other kernel args are configured at every docking run
kernel_prng_ls_float.setArg(1, dockpars.num_of_genes);
#if !defined(SW_EMU)
setKernelArg(kernel_prng_ls_float,2, sizeof(mem_dummy), &mem_dummy);
kernel_prng_ls_float.setArg(2, mem_dummy);
#endif
#endif
#endif // End of ENABLE_KRNL_PRNG_LS_FLOAT
#ifdef ENABLE_KRNL_PRNG_LS2_FLOAT
setKernelArg(kernel_prng_ls2_float,1, sizeof(unsigned char), &dockpars.num_of_genes);
// Other kernel args are configured at every docking run
kernel_prng_ls2_float.setArg(1, dockpars.num_of_genes);
#if !defined(SW_EMU)
setKernelArg(kernel_prng_ls2_float,2, sizeof(mem_dummy), &mem_dummy);
kernel_prng_ls2_float.setArg(2, mem_dummy);
#endif
#endif
#endif // End of ENABLE_KRNL_PRNG_LS2_FLOAT
#ifdef ENABLE_KRNL_PRNG_LS3_FLOAT
setKernelArg(kernel_prng_ls3_float,1, sizeof(unsigned char), &dockpars.num_of_genes);
// Other kernel args are configured at every docking run
kernel_prng_ls3_float.setArg(1, dockpars.num_of_genes);
#if !defined(SW_EMU)
setKernelArg(kernel_prng_ls3_float,2, sizeof(mem_dummy), &mem_dummy);
kernel_prng_ls3_float.setArg(2, mem_dummy);
#endif
#endif
#endif // End of ENABLE_KRNL_PRNG_LS3_FLOAT
#ifdef ENABLE_KRNL_PRNG_LS4_FLOAT
setKernelArg(kernel_prng_ls4_float,1, sizeof(unsigned char), &dockpars.num_of_genes);
// Other kernel args are configured at every docking run
kernel_prng_ls4_float.setArg(1, dockpars.num_of_genes);
#if !defined(SW_EMU)
setKernelArg(kernel_prng_ls4_float,2, sizeof(mem_dummy), &mem_dummy);
kernel_prng_ls4_float.setArg(2, mem_dummy);
#endif
#endif
#endif // End of ENABLE_KRNL_PRNG_LS4_FLOAT
#ifdef ENABLE_KRNL_PRNG_LS5_FLOAT
setKernelArg(kernel_prng_ls5_float,1, sizeof(unsigned char), &dockpars.num_of_genes);
// Other kernel args are configured at every docking run
kernel_prng_ls5_float.setArg(1, dockpars.num_of_genes);
#if !defined(SW_EMU)
setKernelArg(kernel_prng_ls5_float,2, sizeof(mem_dummy), &mem_dummy);
kernel_prng_ls5_float.setArg(2, mem_dummy);
#endif
#endif
#endif // End of ENABLE_KRNL_PRNG_LS5_FLOAT
#ifdef ENABLE_KRNL_PRNG_LS6_FLOAT