Commit b2436392 authored by Leonardo Solis's avatar Leonardo Solis

issue #35: merged E+G + updated fire & adadelta kernels

parent 2ae385a1
......@@ -21,10 +21,53 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
// IMPORTANT: The following block contains definitions
// already made either in energy or gradient calculation files.
// For that reason, these are commented here.
#if 0
//#define DEBUG_ENERGY_KERNEL
/*
#include "calcenergy_basic.h"
*/
typedef struct
{
float atom_charges_const[MAX_NUM_OF_ATOMS];
char atom_types_const [MAX_NUM_OF_ATOMS];
} kernelconstant_interintra;
typedef struct
{
char intraE_contributors_const[3*MAX_INTRAE_CONTRIBUTORS];
} kernelconstant_intracontrib;
typedef struct
{
float reqm_const [ATYPE_NUM];
float reqm_hbond_const [ATYPE_NUM];
unsigned int atom1_types_reqm_const [ATYPE_NUM];
unsigned int atom2_types_reqm_const [ATYPE_NUM];
float VWpars_AC_const [MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES];
float VWpars_BD_const [MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES];
float dspars_S_const [MAX_NUM_OF_ATYPES];
float dspars_V_const [MAX_NUM_OF_ATYPES];
} kernelconstant_intra;
typedef struct
{
int rotlist_const [MAX_NUM_OF_ROTATIONS];
} kernelconstant_rotlist;
typedef struct
{
float ref_coords_x_const[MAX_NUM_OF_ATOMS];
float ref_coords_y_const[MAX_NUM_OF_ATOMS];
float ref_coords_z_const[MAX_NUM_OF_ATOMS];
float rotbonds_moving_vectors_const[3*MAX_NUM_OF_ROTBONDS];
float rotbonds_unit_vectors_const [3*MAX_NUM_OF_ROTBONDS];
float ref_orientation_quats_const [4*MAX_NUM_OF_RUNS];
} kernelconstant_conform;
// All related pragmas are in defines.h (accesible by host and device code)
// The GPU device function calculates the energy's gradient (forces or derivatives)
......@@ -34,9 +77,42 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
// whose population includes the current entity (which can be determined with get_group_id(0)),
// since this determines which reference orientation should be used.
//#define DEBUG_GRAD_TRANSLATION_GENES
//#define DEBUG_GRAD_ROTATION_GENES
//#define DEBUG_GRAD_TORSION_GENES
//#define PRINT_GRAD_TRANSLATION_GENES
//#define PRINT_GRAD_ROTATION_GENES
//#define PRINT_GRAD_TORSION_GENES
#define ENABLE_PARALLEL_GRAD_TORSION
// The following is a scaling of gradients.
// Initially all genotypes and gradients
// were expressed in grid-units (translations)
// and sexagesimal degrees (rotation and torsion angles).
// Expressing them using angstroms / radians
// might help gradient-based minimizers.
// This conversion is applied to final gradients.
#define CONVERT_INTO_ANGSTROM_RADIAN
// Scaling factor to multiply the gradients of
// the genes expressed in degrees (all genes except the first three)
// (GRID-SPACING * GRID-SPACING) / (DEG_TO_RAD * DEG_TO_RAD) = 461.644
#define SCFACTOR_ANGSTROM_RADIAN ((0.375 * 0.375)/(DEG_TO_RAD * DEG_TO_RAD))
void map_priv_angle(float* angle)
// The GPU device function maps
// the input parameter to the interval 0...360
// (supposing that it is an angle).
{
while (*angle >= 360.0f) {
*angle -= 360.0f;
}
while (*angle < 0.0f) {
*angle += 360.0f;
}
}
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
// Atomic operations used in gradients of intra contributors.
// Only atomic_cmpxchg() works on floats.
......@@ -73,8 +149,14 @@ void atomicSub_g_f(volatile __local float *addr, float val)
current.u32 = atomic_cmpxchg( (volatile __local unsigned int *)addr, expected.u32, next.u32);
} while( current.u32 != expected.u32 );
}
#endif
void gpu_calc_EG(
// IMPORTANT: the code of gradient calculation was the initial template.
// Then, statements corresponding to enery calculations were added gradually.
// The latter can be distinguised this way: they are place within lines without indentation.
void gpu_calc_energrad(
int dockpars_rotbondlist_length,
char dockpars_num_of_atoms,
char dockpars_gridsize_x,
......@@ -90,51 +172,39 @@ void gpu_calc_EG(
float dockpars_coeff_elec,
float dockpars_qasp,
float dockpars_coeff_desolv,
float dockpars_smooth,
// Some OpenCL compilers don't allow declaring
// local variables within non-kernel functions.
// These local variables must be declared in a kernel,
// and then passed to non-kernel functions.
// Some OpenCL compilers don't allow declaring
// local variables within non-kernel functions.
// These local variables must be declared in a kernel,
// and then passed to non-kernel functions.
__local float* genotype,
__local float* energy,
__local float* energy,
__local int* run_id,
__local float* calc_coords_x,
__local float* calc_coords_y,
__local float* calc_coords_z,
__local float* partial_energies,
#if defined (DEBUG_ENERGY_KERNEL)
__local float* partial_interE,
__local float* partial_intraE,
#endif
__local float* partial_energies,
__constant float* atom_charges_const,
__constant char* atom_types_const,
__constant char* intraE_contributors_const,
#if 0
bool debug,
#if defined (DEBUG_ENERGY_KERNEL)
__local float* partial_interE,
__local float* partial_intraE,
#endif
float dockpars_smooth,
__constant float* reqm,
__constant float* reqm_hbond,
__constant uint* atom1_types_reqm,
__constant uint* atom2_types_reqm,
__constant float* VWpars_AC_const,
__constant float* VWpars_BD_const,
__constant float* dspars_S_const,
__constant float* dspars_V_const,
__constant int* rotlist_const,
__constant float* ref_coords_x_const,
__constant float* ref_coords_y_const,
__constant float* ref_coords_z_const,
__constant float* rotbonds_moving_vectors_const,
__constant float* rotbonds_unit_vectors_const,
__constant float* ref_orientation_quats_const
__constant int* rotbonds_const,
__constant int* rotbonds_atoms_const,
__constant int* num_rotating_atoms_per_rotbond_const
__constant kernelconstant_interintra* kerconst_interintra,
__global const kernelconstant_intracontrib* kerconst_intracontrib,
__constant kernelconstant_intra* kerconst_intra,
__constant kernelconstant_rotlist* kerconst_rotlist,
__constant kernelconstant_conform* kerconst_conform
,
__constant int* rotbonds_const,
__global const int* rotbonds_atoms_const,
__constant int* num_rotating_atoms_per_rotbond_const
,
__global const float* angle_const,
__constant float* dependence_on_theta_const,
__constant float* dependence_on_rotangle_const
// Gradient-related arguments
// Calculate gradients (forces) for intermolecular energy
......@@ -150,20 +220,15 @@ void gpu_calc_EG(
__local float* gradient_intra_x,
__local float* gradient_intra_y,
__local float* gradient_intra_z,
__local float* gradient_genotype
__local float* gradient_genotype
)
//The GPU device function calculates the energy of the entity described by genotype, dockpars and the liganddata
//arrays in constant memory and returns it in the energy parameter. The parameter run_id has to be equal to the ID
//of the run whose population includes the current entity (which can be determined with blockIdx.x), since this
//determines which reference orientation should be used.
{
partial_energies[get_local_id(0)] = 0.0f;
partial_energies[get_local_id(0)] = 0.0f;
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] = 0.0f;
partial_intraE[get_local_id(0)] = 0.0f;
#endif
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] = 0.0f;
partial_intraE[get_local_id(0)] = 0.0f;
#endif
// Initializing gradients (forces)
// Derived from autodockdev/maps.py
......@@ -188,6 +253,17 @@ void gpu_calc_EG(
}
barrier(CLK_LOCAL_MEM_FENCE);
// Convert orientation genes from sex. to radians
float phi = genotype[3] * DEG_TO_RAD;
float theta = genotype[4] * DEG_TO_RAD;
float genrotangle = genotype[5] * DEG_TO_RAD;
float genrot_unitvec [3];
float sin_angle = native_sin(theta);
genrot_unitvec [0] = sin_angle*native_cos(phi);
genrot_unitvec [1] = sin_angle*native_sin(phi);
genrot_unitvec [2] = native_cos(theta);
uchar g1 = dockpars_gridsize_x;
uint g2 = dockpars_gridsize_x_times_y;
uint g3 = dockpars_gridsize_x_times_y_times_z;
......@@ -199,7 +275,7 @@ void gpu_calc_EG(
rotation_counter < dockpars_rotbondlist_length;
rotation_counter+=NUM_OF_THREADS_PER_BLOCK)
{
int rotation_list_element = rotlist_const[rotation_counter];
int rotation_list_element = kerconst_rotlist->rotlist_const[rotation_counter];
if ((rotation_list_element & RLIST_DUMMY_MASK) == 0) // If not dummy rotation
{
......@@ -210,9 +286,9 @@ void gpu_calc_EG(
if ((rotation_list_element & RLIST_FIRSTROT_MASK) != 0) // If first rotation of this atom
{
atom_to_rotate[0] = ref_coords_x_const[atom_id];
atom_to_rotate[1] = ref_coords_y_const[atom_id];
atom_to_rotate[2] = ref_coords_z_const[atom_id];
atom_to_rotate[0] = kerconst_conform->ref_coords_x_const[atom_id];
atom_to_rotate[1] = kerconst_conform->ref_coords_y_const[atom_id];
atom_to_rotate[2] = kerconst_conform->ref_coords_z_const[atom_id];
}
else
{
......@@ -222,57 +298,54 @@ void gpu_calc_EG(
}
// Capturing rotation vectors and angle
float rotation_unitvec[3];
float rotation_movingvec[3];
float rotation_angle;
float quatrot_left_x, quatrot_left_y, quatrot_left_z, quatrot_left_q;
float quatrot_temp_x, quatrot_temp_y, quatrot_temp_z, quatrot_temp_q;
if ((rotation_list_element & RLIST_GENROT_MASK) != 0) // If general rotation
{
// Rotational genes in the Shoemake space expressed in radians
float u1 = genotype[3];
float u2 = genotype[4];
float u3 = genotype[5];
// u1, u2, u3 should be within their valid range of [0,1]
quatrot_left_q = native_sqrt(1 - u1) * native_sin(PI_TIMES_2*u2);
quatrot_left_x = native_sqrt(1 - u1) * native_cos(PI_TIMES_2*u2);
quatrot_left_y = native_sqrt(u1) * native_sin(PI_TIMES_2*u3);
quatrot_left_z = native_sqrt(u1) * native_cos(PI_TIMES_2*u3);
rotation_unitvec[0] = genrot_unitvec[0];
rotation_unitvec[1] = genrot_unitvec[1];
rotation_unitvec[2] = genrot_unitvec[2];
rotation_movingvec[0] = genotype[0];
rotation_movingvec[1] = genotype[1];
rotation_movingvec[2] = genotype[2];
rotation_angle = genrotangle;
}
else // If rotating around rotatable bond
{
uint rotbond_id = (rotation_list_element & RLIST_RBONDID_MASK) >> RLIST_RBONDID_SHIFT;
float rotation_unitvec[3];
rotation_unitvec[0] = rotbonds_unit_vectors_const[3*rotbond_id];
rotation_unitvec[1] = rotbonds_unit_vectors_const[3*rotbond_id+1];
rotation_unitvec[2] = rotbonds_unit_vectors_const[3*rotbond_id+2];
float rotation_angle = genotype[6+rotbond_id]*DEG_TO_RAD;
rotation_unitvec[0] = kerconst_conform->rotbonds_unit_vectors_const[3*rotbond_id];
rotation_unitvec[1] = kerconst_conform->rotbonds_unit_vectors_const[3*rotbond_id+1];
rotation_unitvec[2] = kerconst_conform->rotbonds_unit_vectors_const[3*rotbond_id+2];
rotation_movingvec[0] = kerconst_conform->rotbonds_moving_vectors_const[3*rotbond_id];
rotation_movingvec[1] = kerconst_conform->rotbonds_moving_vectors_const[3*rotbond_id+1];
rotation_movingvec[2] = kerconst_conform->rotbonds_moving_vectors_const[3*rotbond_id+2];
rotation_movingvec[0] = rotbonds_moving_vectors_const[3*rotbond_id];
rotation_movingvec[1] = rotbonds_moving_vectors_const[3*rotbond_id+1];
rotation_movingvec[2] = rotbonds_moving_vectors_const[3*rotbond_id+2];
rotation_angle = genotype[6+rotbond_id]*DEG_TO_RAD;
// Performing additionally the first movement which
// is needed only if rotating around rotatable bond
atom_to_rotate[0] -= rotation_movingvec[0];
atom_to_rotate[1] -= rotation_movingvec[1];
atom_to_rotate[2] -= rotation_movingvec[2];
// Transforming torsion angles into quaternions
rotation_angle = rotation_angle * 0.5f;
float sin_angle = native_sin(rotation_angle);
quatrot_left_q = native_cos(rotation_angle);
quatrot_left_x = sin_angle*rotation_unitvec[0];
quatrot_left_y = sin_angle*rotation_unitvec[1];
quatrot_left_z = sin_angle*rotation_unitvec[2];
}
// Transforming orientation and torsion angles into quaternions
rotation_angle = rotation_angle * 0.5f;
float sin_angle = native_sin(rotation_angle);
quatrot_left_q = native_cos(rotation_angle);
quatrot_left_x = sin_angle*rotation_unitvec[0];
quatrot_left_y = sin_angle*rotation_unitvec[1];
quatrot_left_z = sin_angle*rotation_unitvec[2];
// Performing rotation
if ((rotation_list_element & RLIST_GENROT_MASK) != 0) // If general rotation,
// two rotations should be performed
......@@ -285,22 +358,22 @@ void gpu_calc_EG(
quatrot_temp_y = quatrot_left_y;
quatrot_temp_z = quatrot_left_z;
quatrot_left_q = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)]-
quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+1]-
quatrot_temp_y*ref_orientation_quats_const[4*(*run_id)+2]-
quatrot_temp_z*ref_orientation_quats_const[4*(*run_id)+3];
quatrot_left_x = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+1]+
ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_x+
quatrot_temp_y*ref_orientation_quats_const[4*(*run_id)+3]-
ref_orientation_quats_const[4*(*run_id)+2]*quatrot_temp_z;
quatrot_left_y = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+2]+
ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_y+
ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_z-
quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+3];
quatrot_left_z = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+3]+
ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_z+
quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+2]-
ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_y;
quatrot_left_q = quatrot_temp_q*kerconst_conform->ref_orientation_quats_const[4*(*run_id)]-
quatrot_temp_x*kerconst_conform->ref_orientation_quats_const[4*(*run_id)+1]-
quatrot_temp_y*kerconst_conform->ref_orientation_quats_const[4*(*run_id)+2]-
quatrot_temp_z*kerconst_conform->ref_orientation_quats_const[4*(*run_id)+3];
quatrot_left_x = quatrot_temp_q*kerconst_conform->ref_orientation_quats_const[4*(*run_id)+1]+
kerconst_conform->ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_x+
quatrot_temp_y*kerconst_conform->ref_orientation_quats_const[4*(*run_id)+3]-
kerconst_conform->ref_orientation_quats_const[4*(*run_id)+2]*quatrot_temp_z;
quatrot_left_y = quatrot_temp_q*kerconst_conform->ref_orientation_quats_const[4*(*run_id)+2]+
kerconst_conform->ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_y+
kerconst_conform->ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_z-
quatrot_temp_x*kerconst_conform->ref_orientation_quats_const[4*(*run_id)+3];
quatrot_left_z = quatrot_temp_q*kerconst_conform->ref_orientation_quats_const[4*(*run_id)+3]+
kerconst_conform->ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_z+
quatrot_temp_x*kerconst_conform->ref_orientation_quats_const[4*(*run_id)+2]-
kerconst_conform->ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_y;
}
quatrot_temp_q = 0 -
......@@ -345,32 +418,32 @@ void gpu_calc_EG(
} // End rotation_counter for-loop
// ================================================
// CALCULATING INTERMOLECULAR ENERGY
// CALCULATING INTERMOLECULAR GRADIENTS
// ================================================
for (uint atom_id = get_local_id(0);
atom_id < dockpars_num_of_atoms;
atom_id+= NUM_OF_THREADS_PER_BLOCK)
{
uint atom_typeid = atom_types_const[atom_id];
uint atom_typeid = kerconst_interintra->atom_types_const[atom_id];
float x = calc_coords_x[atom_id];
float y = calc_coords_y[atom_id];
float z = calc_coords_z[atom_id];
float q = atom_charges_const[atom_id];
float q = kerconst_interintra->atom_charges_const[atom_id];
if ((x < 0) || (y < 0) || (z < 0) || (x >= dockpars_gridsize_x-1)
|| (y >= dockpars_gridsize_y-1)
|| (z >= dockpars_gridsize_z-1)){
partial_energies[get_local_id(0)] += 16777216.0f; //100000.0f;
partial_energies[get_local_id(0)] += 16777216.0f; //100000.0f;
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] += 16777216.0f;
#endif
// Setting gradients (forces) penalties.
// These are valid as long as they are high
gradient_inter_x[atom_id] += 16777216.0f;
gradient_inter_y[atom_id] += 16777216.0f;
gradient_inter_z[atom_id] += 16777216.0f;
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] += 16777216.0f;
#endif
}
else
{
......@@ -385,6 +458,8 @@ void gpu_calc_EG(
float dy = y - y_low;
float dz = z - z_low;
//printf("%-15s %-5u %-10.8f %-10.8f %-10.8f\n", "dx,dy,dz", atom_id, dx, dy, dz);
// Calculating interpolation weights
float weights[2][2][2];
weights [0][0][0] = (1-dx)*(1-dy)*(1-dz);
......@@ -424,16 +499,12 @@ void gpu_calc_EG(
cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp);
cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp);
// Calculating affinity energy
partial_energies[get_local_id(0)] += TRILININTERPOL(cube, weights);
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] += TRILININTERPOL(cube, weights);
#endif
// Calculating affinity energy
partial_energies[get_local_id(0)] += TRILININTERPOL(cube, weights);
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] += TRILININTERPOL(cube, weights);
#endif
// -------------------------------------------------------------------
// Deltas dx, dy, dz are already normalized
......@@ -505,40 +576,12 @@ void gpu_calc_EG(
//printf("%-15s %-3u %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f\n", "atom aff", atom_id, vx_z0, vx_z1, vy_z0, vy_z1, vz_y0, vz_y1);
// Capturing electrostatic values
atom_typeid = dockpars_num_of_atypes;
mul_tmp = atom_typeid*g3;
cube [0][0][0] = *(dockpars_fgrids + offset_cube_000 + mul_tmp);
cube [1][0][0] = *(dockpars_fgrids + offset_cube_100 + mul_tmp);
cube [0][1][0] = *(dockpars_fgrids + offset_cube_010 + mul_tmp);
cube [1][1][0] = *(dockpars_fgrids + offset_cube_110 + mul_tmp);
cube [0][0][1] = *(dockpars_fgrids + offset_cube_001 + mul_tmp);
cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp);
cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp);
cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp);
// Calculating electrostatic energy
partial_energies[get_local_id(0)] += q * TRILININTERPOL(cube, weights);
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] += q * TRILININTERPOL(cube, weights);
#endif
// -------------------------------------------------------------------
// Calculating gradients (forces) corresponding to
// "elec" intermolecular energy
// Derived from autodockdev/maps.py
// -------------------------------------------------------------------
/*
// Capturing electrostatic values
atom_typeid = dockpars_num_of_atypes;
......@@ -551,7 +594,14 @@ void gpu_calc_EG(
cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp);
cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp);
cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp);
*/
// Calculating electrostatic energy
partial_energies[get_local_id(0)] += q * TRILININTERPOL(cube, weights);
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] += q * TRILININTERPOL(cube, weights);
#endif
// Vector in x-direction
x10 = cube [1][0][0] - cube [0][0][0]; // z = 0
x52 = cube [1][1][0] - cube [0][1][0]; // z = 0
......@@ -581,40 +631,12 @@ void gpu_calc_EG(
//printf("%-15s %-3u %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f\n", "elec", atom_id, vx_z0, vx_z1, vy_z0, vy_z1, vz_y0, vz_y1);
// Capturing desolvation values
atom_typeid = dockpars_num_of_atypes+1;
mul_tmp = atom_typeid*g3;
cube [0][0][0] = *(dockpars_fgrids + offset_cube_000 + mul_tmp);
cube [1][0][0] = *(dockpars_fgrids + offset_cube_100 + mul_tmp);
cube [0][1][0] = *(dockpars_fgrids + offset_cube_010 + mul_tmp);
cube [1][1][0] = *(dockpars_fgrids + offset_cube_110 + mul_tmp);
cube [0][0][1] = *(dockpars_fgrids + offset_cube_001 + mul_tmp);
cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp);
cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp);
cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp);
// Calculating desolvation energy
partial_energies[get_local_id(0)] += fabs(q) * TRILININTERPOL(cube, weights);
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] += fabs(q) * TRILININTERPOL(cube, weights);
#endif
// -------------------------------------------------------------------
// Calculating gradients (forces) corresponding to
// "dsol" intermolecular energy
// Derived from autodockdev/maps.py
// -------------------------------------------------------------------
/*
// Capturing desolvation values
atom_typeid = dockpars_num_of_atypes+1;
......@@ -627,7 +649,14 @@ void gpu_calc_EG(
cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp);
cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp);
cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp);
*/
// Calculating desolvation energy
partial_energies[get_local_id(0)] += fabs(q) * TRILININTERPOL(cube, weights);
#if defined (DEBUG_ENERGY_KERNEL)
partial_interE[get_local_id(0)] += fabs(q) * TRILININTERPOL(cube, weights);
#endif
// Vector in x-direction
x10 = cube [1][0][0] - cube [0][0][0]; // z = 0
x52 = cube [1][1][0] - cube [0][1][0]; // z = 0
......@@ -662,79 +691,68 @@ void gpu_calc_EG(
} // End atom_id for-loop (INTERMOLECULAR ENERGY)
#if defined (DEBUG_ENERGY_KERNEL)
barrier(CLK_LOCAL_MEM_FENCE);
#if defined (DEBUG_ENERGY_KERNEL)
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) == 0)
{
float energy_interE = partial_interE[0];
if (get_local_id(0) == 0)
{
float energy_interE = partial_interE[0];
for (uint contributor_counter=1;
contributor_counter<NUM_OF_THREADS_PER_BLOCK;
contributor_counter++)
{
energy_interE += partial_interE[contributor_counter];
}
partial_interE[0] = energy_interE;
//printf("%-20s %-10.8f\n", "energy_interE: ", energy_interE);
for (uint contributor_counter=1;
contributor_counter<NUM_OF_THREADS_PER_BLOCK;
contributor_counter++)
{
energy_interE += partial_interE[contributor_counter];
}
partial_interE[0] = energy_interE;
//printf("%-20s %-10.8f\n", "energy_interE: ", energy_interE);
}
barrier(CLK_LOCAL_MEM_FENCE);
#endif
barrier(CLK_LOCAL_MEM_FENCE);
#endif
// In paper: intermolecular and internal energy calculation
// are independent from each other, -> NO BARRIER NEEDED
// but require different operations,
// thus, they can be executed only sequentially on the GPU.
// Inter- and intra-molecular energy calculation
// are independent from each other, so NO barrier is needed here.
// As these two require different operations,
// they can be executed only sequentially on the GPU.
// ================================================
// CALCULATING INTRAMOLECULAR ENERGY
// CALCULATING INTRAMOLECULAR GRADIENTS
// ================================================
for (uint contributor_counter = get_local_id(0);
contributor_counter < dockpars_num_of_intraE_contributors;
contributor_counter +=NUM_OF_THREADS_PER_BLOCK)
#if 0
if (get_local_id (0) == 0) {
for (uint contributor_counter = 0;
contributor_counter < dockpars_num_of_intraE_contributors;
contributor_counter ++)
#endif
contributor_counter+= NUM_OF_THREADS_PER_BLOCK)
{
#if 0
// Only for testing smoothing
float smoothed_intraE = 0.0f;
float raw_intraE_vdw_hb = 0.0f;
float raw_intraE_el = 0.0f;
float raw_intraE_sol = 0.0f;
float raw_intraE = 0.0f;
#endif
// Storing in a private variable
// the gradient contribution of each contributing atomic pair
float priv_gradient_per_intracontributor= 0.0f;
// Getting atom IDs
uint atom1_id = intraE_contributors_const[3*contributor_counter];
uint atom2_id = intraE_contributors_const[3*contributor_counter+1];