Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
docking
ocladock
Commits
9958035f
Commit
9958035f
authored
Sep 27, 2017
by
Leonardo Solis
Browse files
added kernel-stringify support
parent
c050ab5f
Changes
7
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
9958035f
...
...
@@ -2,7 +2,7 @@
initpop.txt
*.dlg
*.xml
input/
2bxd
/
input/
albumin_dock
/
ocladock.wiki/
# ===================
...
...
Makefile
View file @
9958035f
#
oclad
ock Makefile
#
OCLAD
ock Makefile
# CPU config
INTEL_INCLUDE_PATH
=
$(INTELOCLSDKROOT)
/include
...
...
@@ -128,7 +128,10 @@ endif
all
:
odock
odock
:
$(SRC)
stringify
:
./stringify_ocl_krnls.sh
odock
:
stringify $(SRC)
g++
$(SRC)
$(CFLAGS)
-lOpenCL
-o
$(BIN_DIR)
/
$(TARGET)
$(DEV)
$(NWI)
$(OPT)
$(DD)
$(REP)
$(KFLAGS)
clean
:
...
...
bin/ocladock_gpu_64wi
View file @
9958035f
No preview for this file type
host/inc/stringify.h
0 → 100644
View file @
9958035f
// OCLADOCK: AUTOMATICALLY GENERATED FILE, DO NOT EDIT
#ifndef STRINGIFY_H
#define STRINGIFY_H
const
char
*
calcenergy_ocl
=
"/*
\n
"
" * (C) 2013. Evopro Innovation Kft.
\n
"
" *
\n
"
" * defines.h
\n
"
" *
\n
"
" * Created on: 2009.05.29.
\n
"
" * Author: pechan.imre
\n
"
" */
\n
"
"
\n
"
"#ifndef DEFINES_H_
\n
"
"#define DEFINES_H_
\n
"
"
\n
"
"#if defined (N16WI)
\n
"
" #define NUM_OF_THREADS_PER_BLOCK 16
\n
"
"#elif defined (N32WI)
\n
"
" #define NUM_OF_THREADS_PER_BLOCK 32
\n
"
"#elif defined (N64WI)
\n
"
" #define NUM_OF_THREADS_PER_BLOCK 64
\n
"
"#elif defined (N128WI)
\n
"
" #define NUM_OF_THREADS_PER_BLOCK 128
\n
"
"#else
\n
"
" #define NUM_OF_THREADS_PER_BLOCK 64
\n
"
"#endif
\n
"
"
\n
"
"#define MAX_NUM_OF_ATOMS 90
\n
"
"#define MAX_NUM_OF_ATYPES 14
\n
"
"#define MAX_INTRAE_CONTRIBUTORS 8128
\n
"
"#define MAX_NUM_OF_ROTATIONS 4096
\n
"
"#define MAX_NUM_OF_ROTBONDS 32
\n
"
"#define MAX_POPSIZE 2048
\n
"
"#define MAX_NUM_OF_RUNS 1000
\n
"
"
\n
"
"// Must be bigger than MAX_NUM_OF_ROTBONDS+6
\n
"
"#define GENOTYPE_LENGTH_IN_GLOBMEM 64
\n
"
"#define ACTUAL_GENOTYPE_LENGTH (MAX_NUM_OF_ROTBONDS+6)
\n
"
"
\n
"
"#define LS_EXP_FACTOR 2.0f
\n
"
"#define LS_CONT_FACTOR 0.5f
\n
"
"
\n
"
"// Improvements over Pechan's implementation
\n
"
"#define NATIVE_PRECISION
\n
"
"#define ASYNC_COPY
\n
"
"#define IMPROVE_GRID
\n
"
"#define RESTRICT_ARGS
\n
"
"#define MAPPED_COPY
\n
"
"
\n
"
"#endif /* DEFINES_H_ */
\n
"
"#ifndef CALCENERGY_BASIC_H_
\n
"
"#define CALCENERGY_BASIC_H_
\n
"
"
\n
"
"
\n
"
"#define RLIST_ATOMID_MASK 0x000000FF
\n
"
"#define RLIST_RBONDID_MASK 0x0000FF00
\n
"
"#define RLIST_RBONDID_SHIFT 8
\n
"
"#define RLIST_FIRSTROT_MASK 0x00010000
\n
"
"#define RLIST_GENROT_MASK 0x00020000
\n
"
"#define RLIST_DUMMY_MASK 0x00040000
\n
"
"#define DEG_TO_RAD 0.0174533f
\n
"
"
\n
"
"// LCG: linear congruential generator constants
\n
"
"#define RAND_A 1103515245u
\n
"
"#define RAND_C 12345u
\n
"
"// WARNING: it is supposed that unsigned int is 32 bit long
\n
"
"#define MAX_UINT 4294967296.0f
\n
"
"
\n
"
"// Macro for capturing grid values
\n
"
" // Original
\n
"
" #define GETGRIDVALUE(mempoi,gridsize_x,gridsize_y,gridsize_z,t,z,y,x) *(mempoi + gridsize_x*(y + gridsize_y*(z + gridsize_z*t)) + x)
\n
"
"
\n
"
" // Optimization 1
\n
"
" // #define GETGRIDVALUE_OPT(mempoi,gridsize_x,gridsize_y,mul_tmp,z,y,x) *(mempoi + gridsize_x*(y + gridsize_y*(z + mul_tmp)) + x)
\n
"
"
\n
"
" // Optimization 2
\n
"
" // Implemented direclty in the kernel code: calcenergy_fourkernels_intel.cl
\n
"
"
\n
"
"// Macro for trilinear interpolation
\n
"
"#define TRILININTERPOL(cube, weights) (cube[0][0][0]*weights[0][0][0] +cube[1][0][0]*weights[1][0][0] +
\\\n
"
" cube[0][1][0]*weights[0][1][0] +cube[1][1][0]*weights[1][1][0] +
\\\n
"
" cube[0][0][1]*weights[0][0][1] +cube[1][0][1]*weights[1][0][1] +
\\\n
"
" cube[0][1][1]*weights[0][1][1] +cube[1][1][1]*weights[1][1][1])
\n
"
"
\n
"
"#endif /* CALCENERGY_BASIC_H_ */
\n
"
"
\n
"
"// All related pragmas are in defines.h (accesible by host and device code)
\n
"
"
\n
"
"void gpu_calc_energy( int dockpars_rotbondlist_length,
\n
"
" char dockpars_num_of_atoms,
\n
"
" char dockpars_gridsize_x,
\n
"
" char dockpars_gridsize_y,
\n
"
" char dockpars_gridsize_z,
\n
"
" #if defined (RESTRICT_ARGS)
\n
"
" __global const float* restrict dockpars_fgrids, // cannot be allocated in __constant (too large)
\n
"
" #else
\n
"
" __global const float* dockpars_fgrids, // cannot be allocated in __constant (too large)
\n
"
" #endif
\n
"
" char dockpars_num_of_atypes,
\n
"
" int dockpars_num_of_intraE_contributors,
\n
"
" float dockpars_grid_spacing,
\n
"
" float dockpars_coeff_elec,
\n
"
" float dockpars_qasp,
\n
"
" float dockpars_coeff_desolv,
\n
"
"
\n
"
" __local float* genotype,
\n
"
" __local float* energy,
\n
"
" __local int* run_id,
\n
"
"
\n
"
" // Some OpenCL compilers don't allow local var outside kernels
\n
"
" // so this local vars are passed from a kernel
\n
"
" __local float* calc_coords_x,
\n
"
" __local float* calc_coords_y,
\n
"
" __local float* calc_coords_z,
\n
"
" __local float* partial_energies,
\n
"
"
\n
"
" __constant float* atom_charges_const,
\n
"
" __constant char* atom_types_const,
\n
"
" __constant char* intraE_contributors_const,
\n
"
" __constant float* VWpars_AC_const,
\n
"
" __constant float* VWpars_BD_const,
\n
"
" __constant float* dspars_S_const,
\n
"
" __constant float* dspars_V_const,
\n
"
" __constant int* rotlist_const,
\n
"
" __constant float* ref_coords_x_const,
\n
"
" __constant float* ref_coords_y_const,
\n
"
" __constant float* ref_coords_z_const,
\n
"
" __constant float* rotbonds_moving_vectors_const,
\n
"
" __constant float* rotbonds_unit_vectors_const,
\n
"
" __constant float* ref_orientation_quats_const
\n
"
")
\n
"
"
\n
"
"//The GPU device function calculates the energy of the entity described by genotype, dockpars and the liganddata
\n
"
"//arrays in constant memory and returns it in the energy parameter. The parameter run_id has to be equal to the ID
\n
"
"//of the run whose population includes the current entity (which can be determined with blockIdx.x), since this
\n
"
"//determines which reference orientation should be used.
\n
"
"{
\n
"
" int contributor_counter;
\n
"
" char atom1_id, atom2_id, atom1_typeid, atom2_typeid;
\n
"
"
\n
"
" // Name changed to distance_leo to avoid
\n
"
" // errors as
\"
distance
\"
is the name of OpenCL function
\n
"
" //float subx, suby, subz, distance;
\n
"
" float subx, suby, subz, distance_leo;
\n
"
"
\n
"
" float x, y, z, dx, dy, dz, q;
\n
"
" float cube[2][2][2];
\n
"
" float weights[2][2][2];
\n
"
" int x_low, x_high, y_low, y_high, z_low, z_high;
\n
"
"
\n
"
" float phi, theta, genrotangle, rotation_angle, sin_angle;
\n
"
" float genrot_unitvec[3], rotation_unitvec[3], rotation_movingvec[3];
\n
"
" int rotation_counter, rotation_list_element;
\n
"
" float atom_to_rotate[3];
\n
"
" int atom_id, rotbond_id;
\n
"
" float quatrot_left_x, quatrot_left_y, quatrot_left_z, quatrot_left_q;
\n
"
" float quatrot_temp_x, quatrot_temp_y, quatrot_temp_z, quatrot_temp_q;
\n
"
"
\n
"
" // Some OpenCL compilers don't allow local var outside kernels
\n
"
" // so this local vars are passed from a kernel
\n
"
" //__local float calc_coords_x[MAX_NUM_OF_ATOMS];
\n
"
" //__local float calc_coords_y[MAX_NUM_OF_ATOMS];
\n
"
" //__local float calc_coords_z[MAX_NUM_OF_ATOMS];
\n
"
" //__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];
\n
"
"
\n
"
" partial_energies[get_local_id(0)] = 0.0f;
\n
"
"
\n
"
" //CALCULATE CONFORMATION
\n
"
"
\n
"
" //calculate vectors for general rotation
\n
"
" phi = genotype[3]*DEG_TO_RAD;
\n
"
" theta = genotype[4]*DEG_TO_RAD;
\n
"
" genrotangle = genotype[5]*DEG_TO_RAD;
\n
"
"
\n
"
"#if defined (IMPROVE_GRID)
\n
"
"
\n
"
" #if defined (NATIVE_PRECISION)
\n
"
" sin_angle = native_sin(theta);
\n
"
" genrot_unitvec [0] = sin_angle*native_cos(phi);
\n
"
" genrot_unitvec [1] = sin_angle*native_sin(phi);
\n
"
" genrot_unitvec [2] = native_cos(theta);
\n
"
" #elif defined (HALF_PRECISION)
\n
"
" sin_angle = half_sin(theta);
\n
"
" genrot_unitvec [0] = sin_angle*half_cos(phi);
\n
"
" genrot_unitvec [1] = sin_angle*half_sin(phi);
\n
"
" genrot_unitvec [2] = half_cos(theta);
\n
"
" #else // Full precision
\n
"
" sin_angle = sin(theta);
\n
"
" genrot_unitvec [0] = sin_angle*cos(phi);
\n
"
" genrot_unitvec [1] = sin_angle*sin(phi);
\n
"
" genrot_unitvec [2] = cos(theta);
\n
"
" #endif
\n
"
"
\n
"
" // INTERMOLECULAR for-loop (intermediate results)
\n
"
" // It stores a product of two chars
\n
"
" unsigned int mul_tmp;
\n
"
"
\n
"
" unsigned char g1 = dockpars_gridsize_x;
\n
"
" unsigned int g2 = dockpars_gridsize_x * dockpars_gridsize_y;
\n
"
" unsigned int g3 = dockpars_gridsize_x * dockpars_gridsize_y * dockpars_gridsize_z;
\n
"
"
\n
"
" unsigned int ylow_times_g1, yhigh_times_g1;
\n
"
" unsigned int zlow_times_g2, zhigh_times_g2;
\n
"
"
\n
"
" unsigned int cube_000;
\n
"
" unsigned int cube_100;
\n
"
" unsigned int cube_010;
\n
"
" unsigned int cube_110;
\n
"
" unsigned int cube_001;
\n
"
" unsigned int cube_101;
\n
"
" unsigned int cube_011;
\n
"
" unsigned int cube_111;
\n
"
"
\n
"
"#else
\n
"
" sin_angle = sin(theta);
\n
"
" genrot_unitvec [0] = sin_angle*cos(phi);
\n
"
" genrot_unitvec [1] = sin_angle*sin(phi);
\n
"
" genrot_unitvec [2] = cos(theta);
\n
"
"#endif
\n
"
"
\n
"
" // ================================================
\n
"
" // Iterating over elements of rotation list
\n
"
" // ================================================
\n
"
" for (rotation_counter = get_local_id(0);
\n
"
" rotation_counter < dockpars_rotbondlist_length;
\n
"
" rotation_counter+=NUM_OF_THREADS_PER_BLOCK)
\n
"
" {
\n
"
" rotation_list_element = rotlist_const[rotation_counter];
\n
"
"
\n
"
" if ((rotation_list_element & RLIST_DUMMY_MASK) == 0) //if not dummy rotation
\n
"
" {
\n
"
" atom_id = rotation_list_element & RLIST_ATOMID_MASK;
\n
"
"
\n
"
" //capturing atom coordinates
\n
"
" if ((rotation_list_element & RLIST_FIRSTROT_MASK) != 0) //if firts rotation of this atom
\n
"
" {
\n
"
" atom_to_rotate[0] = ref_coords_x_const[atom_id];
\n
"
" atom_to_rotate[1] = ref_coords_y_const[atom_id];
\n
"
" atom_to_rotate[2] = ref_coords_z_const[atom_id];
\n
"
" }
\n
"
" else
\n
"
" {
\n
"
" atom_to_rotate[0] = calc_coords_x[atom_id];
\n
"
" atom_to_rotate[1] = calc_coords_y[atom_id];
\n
"
" atom_to_rotate[2] = calc_coords_z[atom_id];
\n
"
" }
\n
"
"
\n
"
" //capturing rotation vectors and angle
\n
"
" if ((rotation_list_element & RLIST_GENROT_MASK) != 0) //if general rotation
\n
"
" {
\n
"
" rotation_unitvec[0] = genrot_unitvec[0];
\n
"
" rotation_unitvec[1] = genrot_unitvec[1];
\n
"
" rotation_unitvec[2] = genrot_unitvec[2];
\n
"
"
\n
"
" rotation_angle = genrotangle;
\n
"
"
\n
"
" rotation_movingvec[0] = genotype[0];
\n
"
" rotation_movingvec[1] = genotype[1];
\n
"
" rotation_movingvec[2] = genotype[2];
\n
"
" }
\n
"
" else //if rotating around rotatable bond
\n
"
" {
\n
"
" rotbond_id = (rotation_list_element & RLIST_RBONDID_MASK) >> RLIST_RBONDID_SHIFT;
\n
"
"
\n
"
" rotation_unitvec[0] = rotbonds_unit_vectors_const[3*rotbond_id];
\n
"
" rotation_unitvec[1] = rotbonds_unit_vectors_const[3*rotbond_id+1];
\n
"
" rotation_unitvec[2] = rotbonds_unit_vectors_const[3*rotbond_id+2];
\n
"
" rotation_angle = genotype[6+rotbond_id]*DEG_TO_RAD;
\n
"
"
\n
"
" rotation_movingvec[0] = rotbonds_moving_vectors_const[3*rotbond_id];
\n
"
" rotation_movingvec[1] = rotbonds_moving_vectors_const[3*rotbond_id+1];
\n
"
" rotation_movingvec[2] = rotbonds_moving_vectors_const[3*rotbond_id+2];
\n
"
"
\n
"
" //in addition, performing the first movement which is needed only if rotating around rotatable bond
\n
"
" atom_to_rotate[0] -= rotation_movingvec[0];
\n
"
" atom_to_rotate[1] -= rotation_movingvec[1];
\n
"
" atom_to_rotate[2] -= rotation_movingvec[2];
\n
"
" }
\n
"
"
\n
"
" //performing rotation
\n
"
"
\n
"
"#if defined (NATIVE_PRECISION)
\n
"
" rotation_angle = native_divide(rotation_angle,2);
\n
"
" quatrot_left_q = native_cos(rotation_angle);
\n
"
" sin_angle = native_sin(rotation_angle);
\n
"
"#elif defined (HALF_PRECISION)
\n
"
" rotation_angle = half_divide(rotation_angle,2);
\n
"
" quatrot_left_q = half_cos(rotation_angle);
\n
"
" sin_angle = half_sin(rotation_angle);
\n
"
"#else // Full precision
\n
"
" rotation_angle = rotation_angle/2;
\n
"
" quatrot_left_q = cos(rotation_angle);
\n
"
" sin_angle = sin(rotation_angle);
\n
"
"#endif
\n
"
" quatrot_left_x = sin_angle*rotation_unitvec[0];
\n
"
" quatrot_left_y = sin_angle*rotation_unitvec[1];
\n
"
" quatrot_left_z = sin_angle*rotation_unitvec[2];
\n
"
"
\n
"
" if ((rotation_list_element & RLIST_GENROT_MASK) != 0) // if general rotation,
\n
"
" // two rotations should be performed
\n
"
" // (multiplying the quaternions)
\n
"
" {
\n
"
" //calculating quatrot_left*ref_orientation_quats_const,
\n
"
" //which means that reference orientation rotation is the first
\n
"
" quatrot_temp_q = quatrot_left_q;
\n
"
" quatrot_temp_x = quatrot_left_x;
\n
"
" quatrot_temp_y = quatrot_left_y;
\n
"
" quatrot_temp_z = quatrot_left_z;
\n
"
"
\n
"
" quatrot_left_q = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)]-
\n
"
" quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+1]-
\n
"
" quatrot_temp_y*ref_orientation_quats_const[4*(*run_id)+2]-
\n
"
" quatrot_temp_z*ref_orientation_quats_const[4*(*run_id)+3];
\n
"
" quatrot_left_x = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+1]+
\n
"
" ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_x+
\n
"
" quatrot_temp_y*ref_orientation_quats_const[4*(*run_id)+3]-
\n
"
" ref_orientation_quats_const[4*(*run_id)+2]*quatrot_temp_z;
\n
"
" quatrot_left_y = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+2]+
\n
"
" ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_y+
\n
"
" ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_z-
\n
"
" quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+3];
\n
"
" quatrot_left_z = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+3]+
\n
"
" ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_z+
\n
"
" quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+2]-
\n
"
" ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_y;
\n
"
"
\n
"
" }
\n
"
"
\n
"
" quatrot_temp_q = 0 -
\n
"
" quatrot_left_x*atom_to_rotate [0] -
\n
"
" quatrot_left_y*atom_to_rotate [1] -
\n
"
" quatrot_left_z*atom_to_rotate [2];
\n
"
" quatrot_temp_x = quatrot_left_q*atom_to_rotate [0] +
\n
"
" quatrot_left_y*atom_to_rotate [2] -
\n
"
" quatrot_left_z*atom_to_rotate [1];
\n
"
" quatrot_temp_y = quatrot_left_q*atom_to_rotate [1] -
\n
"
" quatrot_left_x*atom_to_rotate [2] +
\n
"
" quatrot_left_z*atom_to_rotate [0];
\n
"
" quatrot_temp_z = quatrot_left_q*atom_to_rotate [2] +
\n
"
" quatrot_left_x*atom_to_rotate [1] -
\n
"
" quatrot_left_y*atom_to_rotate [0];
\n
"
"
\n
"
" atom_to_rotate [0] = 0 -
\n
"
" quatrot_temp_q*quatrot_left_x +
\n
"
" quatrot_temp_x*quatrot_left_q -
\n
"
" quatrot_temp_y*quatrot_left_z +
\n
"
" quatrot_temp_z*quatrot_left_y;
\n
"
" atom_to_rotate [1] = 0 -
\n
"
" quatrot_temp_q*quatrot_left_y +
\n
"
" quatrot_temp_x*quatrot_left_z +
\n
"
" quatrot_temp_y*quatrot_left_q -
\n
"
" quatrot_temp_z*quatrot_left_x;
\n
"
" atom_to_rotate [2] = 0 -
\n
"
" quatrot_temp_q*quatrot_left_z -
\n
"
" quatrot_temp_x*quatrot_left_y +
\n
"
" quatrot_temp_y*quatrot_left_x +
\n
"
" quatrot_temp_z*quatrot_left_q;
\n
"
"
\n
"
" //performing final movement and storing values
\n
"
" calc_coords_x[atom_id] = atom_to_rotate [0] + rotation_movingvec[0];
\n
"
" calc_coords_y[atom_id] = atom_to_rotate [1] + rotation_movingvec[1];
\n
"
" calc_coords_z[atom_id] = atom_to_rotate [2] + rotation_movingvec[2];
\n
"
"
\n
"
" } // End if-statement not dummy rotation
\n
"
"
\n
"
" barrier(CLK_LOCAL_MEM_FENCE);
\n
"
"
\n
"
" } // End rotation_counter for-loop
\n
"
"
\n
"
" // ================================================
\n
"
" // CALCULATE INTERMOLECULAR ENERGY
\n
"
" // ================================================
\n
"
" for (atom1_id = get_local_id(0);
\n
"
" atom1_id < dockpars_num_of_atoms;
\n
"
" atom1_id+= NUM_OF_THREADS_PER_BLOCK)
\n
"
" {
\n
"
" atom1_typeid = atom_types_const[atom1_id];
\n
"
" x = calc_coords_x[atom1_id];
\n
"
" y = calc_coords_y[atom1_id];
\n
"
" z = calc_coords_z[atom1_id];
\n
"
" q = atom_charges_const[atom1_id];
\n
"
"
\n
"
" if ((x < 0) || (y < 0) || (z < 0) || (x >= dockpars_gridsize_x-1)
\n
"
" || (y >= dockpars_gridsize_y-1)
\n
"
" || (z >= dockpars_gridsize_z-1)){
\n
"
" partial_energies[get_local_id(0)] += 16777216.0f; //100000.0f;
\n
"
" }
\n
"
" else
\n
"
" {
\n
"
" //get coordinates
\n
"
" x_low = (int)floor(x); y_low = (int)floor(y); z_low = (int)floor(z);
\n
"
" x_high = (int)ceil(x); y_high = (int)ceil(y); z_high = (int)ceil(z);
\n
"
" dx = x - x_low; dy = y - y_low; dz = z - z_low;
\n
"
"
\n
"
" //calculate interpolation weights
\n
"
" weights [0][0][0] = (1-dx)*(1-dy)*(1-dz);
\n
"
" weights [1][0][0] = dx*(1-dy)*(1-dz);
\n
"
" weights [0][1][0] = (1-dx)*dy*(1-dz);
\n
"
" weights [1][1][0] = dx*dy*(1-dz);
\n
"
" weights [0][0][1] = (1-dx)*(1-dy)*dz;
\n
"
" weights [1][0][1] = dx*(1-dy)*dz;
\n
"
" weights [0][1][1] = (1-dx)*dy*dz;
\n
"
" weights [1][1][1] = dx*dy*dz;
\n
"
"
\n
"
" //capturing affinity values
\n
"
"#if defined (IMPROVE_GRID)
\n
"
" ylow_times_g1 = y_low*g1;
\n
"
" yhigh_times_g1 = y_high*g1;
\n
"
" zlow_times_g2 = z_low*g2;
\n
"
" zhigh_times_g2 = z_high*g2;
\n
"
"
\n
"
" cube_000 = x_low + ylow_times_g1 + zlow_times_g2;
\n
"
" cube_100 = x_high + ylow_times_g1 + zlow_times_g2;
\n
"
" cube_010 = x_low + yhigh_times_g1 + zlow_times_g2;
\n
"
" cube_110 = x_high + yhigh_times_g1 + zlow_times_g2;
\n
"
" cube_001 = x_low + ylow_times_g1 + zhigh_times_g2;
\n
"
" cube_101 = x_high + ylow_times_g1 + zhigh_times_g2;
\n
"
" cube_011 = x_low + yhigh_times_g1 + zhigh_times_g2;
\n
"
" cube_111 = x_high + yhigh_times_g1 + zhigh_times_g2;
\n
"
" mul_tmp = atom1_typeid*g3;
\n
"
"
\n
"
" cube [0][0][0] = *(dockpars_fgrids + cube_000 + mul_tmp);
\n
"
" cube [1][0][0] = *(dockpars_fgrids + cube_100 + mul_tmp);
\n
"
" cube [0][1][0] = *(dockpars_fgrids + cube_010 + mul_tmp);
\n
"
" cube [1][1][0] = *(dockpars_fgrids + cube_110 + mul_tmp);
\n
"
" cube [0][0][1] = *(dockpars_fgrids + cube_001 + mul_tmp);
\n
"
" cube [1][0][1] = *(dockpars_fgrids + cube_101 + mul_tmp);
\n
"
" cube [0][1][1] = *(dockpars_fgrids + cube_011 + mul_tmp);
\n
"
" cube [1][1][1] = *(dockpars_fgrids + cube_111 + mul_tmp);
\n
"
"
\n
"
"#else
\n
"
" cube [0][0][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_low, x_low);
\n
"
" cube [1][0][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_low, x_high);
\n
"
" cube [0][1][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_high, x_low);
\n
"
" cube [1][1][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_high, x_high);
\n
"
" cube [0][0][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_low, x_low);
\n
"
" cube [1][0][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_low, x_high);
\n
"
" cube [0][1][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_high, x_low);
\n
"
" cube [1][1][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_high, x_high);
\n
"
"#endif
\n
"
"
\n
"
" //calculating affinity energy
\n
"
" partial_energies[get_local_id(0)] += TRILININTERPOL(cube, weights);
\n
"
"
\n
"
" //capturing electrostatic values
\n
"
" atom1_typeid = dockpars_num_of_atypes;
\n
"
"
\n
"
"#if defined (IMPROVE_GRID)
\n
"
" mul_tmp = atom1_typeid*g3;
\n
"
" cube [0][0][0] = *(dockpars_fgrids + cube_000 + mul_tmp);
\n
"
" cube [1][0][0] = *(dockpars_fgrids + cube_100 + mul_tmp);
\n
"
" cube [0][1][0] = *(dockpars_fgrids + cube_010 + mul_tmp);
\n
"
" cube [1][1][0] = *(dockpars_fgrids + cube_110 + mul_tmp);
\n
"
" cube [0][0][1] = *(dockpars_fgrids + cube_001 + mul_tmp);
\n
"
" cube [1][0][1] = *(dockpars_fgrids + cube_101 + mul_tmp);
\n
"
" cube [0][1][1] = *(dockpars_fgrids + cube_011 + mul_tmp);
\n
"
" cube [1][1][1] = *(dockpars_fgrids + cube_111 + mul_tmp);
\n
"
"
\n
"
"#else
\n
"
" cube [0][0][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_low, x_low);
\n
"
" cube [1][0][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_low, x_high);
\n
"
" cube [0][1][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_high, x_low);
\n
"
" cube [1][1][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_high, x_high);
\n
"
" cube [0][0][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_low, x_low);
\n
"
" cube [1][0][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_low, x_high);
\n
"
" cube [0][1][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_high, x_low);
\n
"
" cube [1][1][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_high, x_high);
\n
"
"#endif
\n
"
"
\n
"
" //calculating electrosatic energy
\n
"
" partial_energies[get_local_id(0)] += q * TRILININTERPOL(cube, weights);
\n
"
"
\n
"
" //capturing desolvation values
\n
"
" atom1_typeid = dockpars_num_of_atypes+1;
\n
"
"
\n
"
"#if defined (IMPROVE_GRID)
\n
"
" mul_tmp = atom1_typeid*g3;
\n
"
" cube [0][0][0] = *(dockpars_fgrids + cube_000 + mul_tmp);
\n
"
" cube [1][0][0] = *(dockpars_fgrids + cube_100 + mul_tmp);
\n
"
" cube [0][1][0] = *(dockpars_fgrids + cube_010 + mul_tmp);
\n
"
" cube [1][1][0] = *(dockpars_fgrids + cube_110 + mul_tmp);
\n
"
" cube [0][0][1] = *(dockpars_fgrids + cube_001 + mul_tmp);
\n
"
" cube [1][0][1] = *(dockpars_fgrids + cube_101 + mul_tmp);
\n
"
" cube [0][1][1] = *(dockpars_fgrids + cube_011 + mul_tmp);
\n
"
" cube [1][1][1] = *(dockpars_fgrids + cube_111 + mul_tmp);
\n
"
"
\n
"
"#else
\n
"
" cube [0][0][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_low, x_low);
\n
"
" cube [1][0][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_low, x_high);
\n
"
" cube [0][1][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_high, x_low);
\n
"
" cube [1][1][0] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_low, y_high, x_high);
\n
"
" cube [0][0][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_low, x_low);
\n
"
" cube [1][0][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_low, x_high);
\n
"
" cube [0][1][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_high, x_low);
\n
"
" cube [1][1][1] = GETGRIDVALUE(dockpars_fgrids, dockpars_gridsize_x,
\n
"
" dockpars_gridsize_y, dockpars_gridsize_z,
\n
"
" atom1_typeid, z_high, y_high, x_high);
\n
"
"#endif
\n
"
"
\n
"
" //calculating desolvation energy
\n
"
" partial_energies[get_local_id(0)] += fabs(q) * TRILININTERPOL(cube, weights);
\n
"
" }
\n
"
"
\n
"
" } // End atom1_id for-loop
\n
"
"
\n
"
" // In paper: intermolecular and internal energy calculation
\n
"
" // are independent from each other, -> NO BARRIER NEEDED
\n
"
" // but require different operations,
\n
"
" // thus, they can be executed only sequentially on the GPU.
\n
"
"
\n
"
" // ================================================
\n
"
" // CALCULATE INTRAMOLECULAR ENERGY
\n
"
" // ================================================
\n
"
" for (contributor_counter = get_local_id(0);
\n
"
" contributor_counter < dockpars_num_of_intraE_contributors;
\n
"
" contributor_counter +=NUM_OF_THREADS_PER_BLOCK)
\n
"
" {
\n
"
" //getting atom IDs
\n
"
" atom1_id = intraE_contributors_const[3*contributor_counter];
\n
"
" atom2_id = intraE_contributors_const[3*contributor_counter+1];
\n
"
"
\n
"
" //calculating address of first atom's coordinates
\n
"
" subx = calc_coords_x[atom1_id];
\n
"
" suby = calc_coords_y[atom1_id];
\n
"
" subz = calc_coords_z[atom1_id];
\n
"
"
\n
"
" //calculating address of second atom's coordinates
\n
"
" subx -= calc_coords_x[atom2_id];
\n
"
" suby -= calc_coords_y[atom2_id];
\n
"
" subz -= calc_coords_z[atom2_id];
\n
"
"
\n
"
" //calculating distance (distance_leo)
\n
"
"#if defined (NATIVE_PRECISION)
\n
"
" distance_leo = native_sqrt(subx*subx + suby*suby + subz*subz)*dockpars_grid_spacing;
\n
"
"#elif defined (HALF_PRECISION)
\n
"
" distance_leo = half_sqrt(subx*subx + suby*suby + subz*subz)*dockpars_grid_spacing;
\n
"
"#else // Full precision
\n
"
" distance_leo = sqrt(subx*subx + suby*suby + subz*subz)*dockpars_grid_spacing;
\n
"
"#endif
\n
"
"
\n
"
" if (distance_leo < 1.0f)
\n
"
" distance_leo = 1.0f;
\n
"
"
\n
"
" //calculating energy contributions
\n
"
" if ((distance_leo < 8.0f) && (distance_leo < 20.48f))
\n
"
" {
\n
"
" //getting type IDs
\n
"
" atom1_typeid = atom_types_const[atom1_id];
\n
"
" atom2_typeid = atom_types_const[atom2_id];
\n
"
"
\n
"
" //calculating van der Waals / hydrogen bond term
\n
"
"#if defined (NATIVE_PRECISION)
\n
"
" partial_energies[get_local_id(0)] += native_divide(VWpars_AC_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],native_powr(distance_leo,12));
\n
"
"#elif defined (HALF_PRECISION)
\n
"