/* OCLADock, an OpenCL implementation of AutoDock 4.2 running a Lamarckian Genetic Algorithm Copyright (C) 2017 TU Darmstadt, Embedded Systems and Applications Group, Germany. All rights reserved. AutoDock is a Trade Mark of the Scripps Research Institute. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* #include "calcenergy_basic.h" */ // All related pragmas are in defines.h (accesible by host and device code) // The GPU device function calculates the energy's gradient (forces or derivatives) // of the entity described by genotype, dockpars and the ligand-data // arrays in constant memory and returns it in the "gradient_genotype" parameter. // The parameter "run_id" has to be equal to the ID of the run // whose population includes the current entity (which can be determined with get_group_id(0)), // since this determines which reference orientation should be used. //#define DEBUG_GRAD_TRANSLATION_GENES //#define DEBUG_GRAD_ROTATION_GENES //#define DEBUG_GRAD_TORSION_GENES void map_priv_angle(float* angle) // The GPU device function maps // the input parameter to the interval 0...360 // (supposing that it is an angle). { while (*angle >= 360.0f) { *angle -= 360.0f; } while (*angle < 0.0f) { *angle += 360.0f; } } // Atomic operations used in gradients of intra contributors. // Only atomic_cmpxchg() works on floats. // So for atomic add on floats, this link was used: // https://streamhpc.com/blog/2016-02-09/atomic-operations-for-floats-in-opencl-improved/ void atomicAdd_g_f(volatile __local float *addr, float val) { union{ unsigned int u32; float f32; } next, expected, current; current.f32 = *addr; do{ expected.f32 = current.f32; next.f32 = expected.f32 + val; current.u32 = atomic_cmpxchg( (volatile __local unsigned int *)addr, expected.u32, next.u32); } while( current.u32 != expected.u32 ); } void atomicSub_g_f(volatile __local float *addr, float val) { union{ unsigned int u32; float f32; } next, expected, current; current.f32 = *addr; do{ expected.f32 = current.f32; next.f32 = expected.f32 - val; current.u32 = atomic_cmpxchg( (volatile __local unsigned int *)addr, expected.u32, next.u32); } while( current.u32 != expected.u32 ); } void gpu_calc_gradient( int dockpars_rotbondlist_length, char dockpars_num_of_atoms, char dockpars_gridsize_x, char dockpars_gridsize_y, char dockpars_gridsize_z, // g1 = gridsize_x uint dockpars_gridsize_x_times_y, // g2 = gridsize_x * gridsize_y uint dockpars_gridsize_x_times_y_times_z, // g3 = gridsize_x * gridsize_y * gridsize_z __global const float* restrict dockpars_fgrids, // This is too large to be allocated in __constant char dockpars_num_of_atypes, int dockpars_num_of_intraE_contributors, float dockpars_grid_spacing, float dockpars_coeff_elec, float dockpars_qasp, float dockpars_coeff_desolv, // Some OpenCL compilers don't allow declaring // local variables within non-kernel functions. // These local variables must be declared in a kernel, // and then passed to non-kernel functions. __local float* genotype, __local float* energy, __local int* run_id, __local float* calc_coords_x, __local float* calc_coords_y, __local float* calc_coords_z, __constant float* atom_charges_const, __constant char* atom_types_const, __constant char* intraE_contributors_const, float dockpars_smooth, __constant float* reqm, __constant float* reqm_hbond, __constant uint* atom1_types_reqm, __constant uint* atom2_types_reqm, __constant float* VWpars_AC_const, __constant float* VWpars_BD_const, __constant float* dspars_S_const, __constant float* dspars_V_const, __constant int* rotlist_const, __constant float* ref_coords_x_const, __constant float* ref_coords_y_const, __constant float* ref_coords_z_const, __constant float* rotbonds_moving_vectors_const, __constant float* rotbonds_unit_vectors_const, __constant float* ref_orientation_quats_const, __constant int* rotbonds_const, __constant int* rotbonds_atoms_const, __constant int* num_rotating_atoms_per_rotbond_const , __constant float* angle_const, __constant float* dependence_on_theta_const, __constant float* dependence_on_rotangle_const // Gradient-related arguments // Calculate gradients (forces) for intermolecular energy // Derived from autodockdev/maps.py // "is_enabled_gradient_calc": enables gradient calculation. // In Genetic-Generation: no need for gradients // In Gradient-Minimizer: must calculate gradients , int dockpars_num_of_genes, __local float* gradient_inter_x, __local float* gradient_inter_y, __local float* gradient_inter_z, __local float* gradient_intra_x, __local float* gradient_intra_y, __local float* gradient_intra_z, __local float* gradient_genotype ) { // Initializing gradients (forces) // Derived from autodockdev/maps.py for (uint atom_id = get_local_id(0); atom_id < dockpars_num_of_atoms; atom_id+= NUM_OF_THREADS_PER_BLOCK) { // Intermolecular gradients gradient_inter_x[atom_id] = 0.0f; gradient_inter_y[atom_id] = 0.0f; gradient_inter_z[atom_id] = 0.0f; // Intramolecular gradients gradient_intra_x[atom_id] = 0.0f; gradient_intra_y[atom_id] = 0.0f; gradient_intra_z[atom_id] = 0.0f; } // Initializing gradient genotypes for (uint gene_cnt = get_local_id(0); gene_cnt < dockpars_num_of_genes; gene_cnt+= NUM_OF_THREADS_PER_BLOCK) { gradient_genotype[gene_cnt] = 0.0f; } barrier(CLK_LOCAL_MEM_FENCE); // Convert orientation genes from sex. to radians float phi = genotype[3] * DEG_TO_RAD; float theta = genotype[4] * DEG_TO_RAD; float genrotangle = genotype[5] * DEG_TO_RAD; float genrot_unitvec [3]; float sin_angle = native_sin(theta); genrot_unitvec [0] = sin_angle*native_cos(phi); genrot_unitvec [1] = sin_angle*native_sin(phi); genrot_unitvec [2] = native_cos(theta); uchar g1 = dockpars_gridsize_x; uint g2 = dockpars_gridsize_x_times_y; uint g3 = dockpars_gridsize_x_times_y_times_z; // ================================================ // CALCULATING ATOMIC POSITIONS AFTER ROTATIONS // ================================================ for (uint rotation_counter = get_local_id(0); rotation_counter < dockpars_rotbondlist_length; rotation_counter+=NUM_OF_THREADS_PER_BLOCK) { int rotation_list_element = rotlist_const[rotation_counter]; if ((rotation_list_element & RLIST_DUMMY_MASK) == 0) // If not dummy rotation { uint atom_id = rotation_list_element & RLIST_ATOMID_MASK; // Capturing atom coordinates float atom_to_rotate[3]; if ((rotation_list_element & RLIST_FIRSTROT_MASK) != 0) // If first rotation of this atom { atom_to_rotate[0] = ref_coords_x_const[atom_id]; atom_to_rotate[1] = ref_coords_y_const[atom_id]; atom_to_rotate[2] = ref_coords_z_const[atom_id]; } else { atom_to_rotate[0] = calc_coords_x[atom_id]; atom_to_rotate[1] = calc_coords_y[atom_id]; atom_to_rotate[2] = calc_coords_z[atom_id]; } // Capturing rotation vectors and angle float rotation_unitvec[3]; float rotation_movingvec[3]; float rotation_angle; float quatrot_left_x, quatrot_left_y, quatrot_left_z, quatrot_left_q; float quatrot_temp_x, quatrot_temp_y, quatrot_temp_z, quatrot_temp_q; if ((rotation_list_element & RLIST_GENROT_MASK) != 0) // If general rotation { rotation_unitvec[0] = genrot_unitvec[0]; rotation_unitvec[1] = genrot_unitvec[1]; rotation_unitvec[2] = genrot_unitvec[2]; rotation_movingvec[0] = genotype[0]; rotation_movingvec[1] = genotype[1]; rotation_movingvec[2] = genotype[2]; rotation_angle = genrotangle; } else // If rotating around rotatable bond { uint rotbond_id = (rotation_list_element & RLIST_RBONDID_MASK) >> RLIST_RBONDID_SHIFT; rotation_unitvec[0] = rotbonds_unit_vectors_const[3*rotbond_id]; rotation_unitvec[1] = rotbonds_unit_vectors_const[3*rotbond_id+1]; rotation_unitvec[2] = rotbonds_unit_vectors_const[3*rotbond_id+2]; rotation_movingvec[0] = rotbonds_moving_vectors_const[3*rotbond_id]; rotation_movingvec[1] = rotbonds_moving_vectors_const[3*rotbond_id+1]; rotation_movingvec[2] = rotbonds_moving_vectors_const[3*rotbond_id+2]; float rotation_angle = genotype[6+rotbond_id]*DEG_TO_RAD; // Performing additionally the first movement which // is needed only if rotating around rotatable bond atom_to_rotate[0] -= rotation_movingvec[0]; atom_to_rotate[1] -= rotation_movingvec[1]; atom_to_rotate[2] -= rotation_movingvec[2]; } // Transforming orientation and torsion angles into quaternions rotation_angle = rotation_angle * 0.5f; float sin_angle = native_sin(rotation_angle); quatrot_left_q = native_cos(rotation_angle); quatrot_left_x = sin_angle*rotation_unitvec[0]; quatrot_left_y = sin_angle*rotation_unitvec[1]; quatrot_left_z = sin_angle*rotation_unitvec[2]; // Performing rotation if ((rotation_list_element & RLIST_GENROT_MASK) != 0) // If general rotation, // two rotations should be performed // (multiplying the quaternions) { // Calculating quatrot_left*ref_orientation_quats_const, // which means that reference orientation rotation is the first quatrot_temp_q = quatrot_left_q; quatrot_temp_x = quatrot_left_x; quatrot_temp_y = quatrot_left_y; quatrot_temp_z = quatrot_left_z; quatrot_left_q = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)]- quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+1]- quatrot_temp_y*ref_orientation_quats_const[4*(*run_id)+2]- quatrot_temp_z*ref_orientation_quats_const[4*(*run_id)+3]; quatrot_left_x = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+1]+ ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_x+ quatrot_temp_y*ref_orientation_quats_const[4*(*run_id)+3]- ref_orientation_quats_const[4*(*run_id)+2]*quatrot_temp_z; quatrot_left_y = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+2]+ ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_y+ ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_z- quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+3]; quatrot_left_z = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+3]+ ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_z+ quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+2]- ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_y; } quatrot_temp_q = 0 - quatrot_left_x*atom_to_rotate [0] - quatrot_left_y*atom_to_rotate [1] - quatrot_left_z*atom_to_rotate [2]; quatrot_temp_x = quatrot_left_q*atom_to_rotate [0] + quatrot_left_y*atom_to_rotate [2] - quatrot_left_z*atom_to_rotate [1]; quatrot_temp_y = quatrot_left_q*atom_to_rotate [1] - quatrot_left_x*atom_to_rotate [2] + quatrot_left_z*atom_to_rotate [0]; quatrot_temp_z = quatrot_left_q*atom_to_rotate [2] + quatrot_left_x*atom_to_rotate [1] - quatrot_left_y*atom_to_rotate [0]; atom_to_rotate [0] = 0 - quatrot_temp_q*quatrot_left_x + quatrot_temp_x*quatrot_left_q - quatrot_temp_y*quatrot_left_z + quatrot_temp_z*quatrot_left_y; atom_to_rotate [1] = 0 - quatrot_temp_q*quatrot_left_y + quatrot_temp_x*quatrot_left_z + quatrot_temp_y*quatrot_left_q - quatrot_temp_z*quatrot_left_x; atom_to_rotate [2] = 0 - quatrot_temp_q*quatrot_left_z - quatrot_temp_x*quatrot_left_y + quatrot_temp_y*quatrot_left_x + quatrot_temp_z*quatrot_left_q; // Performing final movement and storing values calc_coords_x[atom_id] = atom_to_rotate [0] + rotation_movingvec[0]; calc_coords_y[atom_id] = atom_to_rotate [1] + rotation_movingvec[1]; calc_coords_z[atom_id] = atom_to_rotate [2] + rotation_movingvec[2]; } // End if-statement not dummy rotation barrier(CLK_LOCAL_MEM_FENCE); } // End rotation_counter for-loop // ================================================ // CALCULATING INTERMOLECULAR GRADIENTS // ================================================ for (uint atom_id = get_local_id(0); atom_id < dockpars_num_of_atoms; atom_id+= NUM_OF_THREADS_PER_BLOCK) { uint atom_typeid = atom_types_const[atom_id]; float x = calc_coords_x[atom_id]; float y = calc_coords_y[atom_id]; float z = calc_coords_z[atom_id]; float q = atom_charges_const[atom_id]; if ((x < 0) || (y < 0) || (z < 0) || (x >= dockpars_gridsize_x-1) || (y >= dockpars_gridsize_y-1) || (z >= dockpars_gridsize_z-1)){ // Setting gradients (forces) penalties. // These are valid as long as they are high gradient_inter_x[atom_id] += 16777216.0f; gradient_inter_y[atom_id] += 16777216.0f; gradient_inter_z[atom_id] += 16777216.0f; } else { // Getting coordinates int x_low = (int)floor(x); int y_low = (int)floor(y); int z_low = (int)floor(z); int x_high = (int)ceil(x); int y_high = (int)ceil(y); int z_high = (int)ceil(z); float dx = x - x_low; float dy = y - y_low; float dz = z - z_low; //printf("%-15s %-5u %-10.8f %-10.8f %-10.8f\n", "dx,dy,dz", atom_id, dx, dy, dz); // Calculating interpolation weights float weights[2][2][2]; weights [0][0][0] = (1-dx)*(1-dy)*(1-dz); weights [1][0][0] = dx*(1-dy)*(1-dz); weights [0][1][0] = (1-dx)*dy*(1-dz); weights [1][1][0] = dx*dy*(1-dz); weights [0][0][1] = (1-dx)*(1-dy)*dz; weights [1][0][1] = dx*(1-dy)*dz; weights [0][1][1] = (1-dx)*dy*dz; weights [1][1][1] = dx*dy*dz; // Capturing affinity values uint ylow_times_g1 = y_low*g1; uint yhigh_times_g1 = y_high*g1; uint zlow_times_g2 = z_low*g2; uint zhigh_times_g2 = z_high*g2; // Grid offset uint offset_cube_000 = x_low + ylow_times_g1 + zlow_times_g2; uint offset_cube_100 = x_high + ylow_times_g1 + zlow_times_g2; uint offset_cube_010 = x_low + yhigh_times_g1 + zlow_times_g2; uint offset_cube_110 = x_high + yhigh_times_g1 + zlow_times_g2; uint offset_cube_001 = x_low + ylow_times_g1 + zhigh_times_g2; uint offset_cube_101 = x_high + ylow_times_g1 + zhigh_times_g2; uint offset_cube_011 = x_low + yhigh_times_g1 + zhigh_times_g2; uint offset_cube_111 = x_high + yhigh_times_g1 + zhigh_times_g2; uint mul_tmp = atom_typeid*g3; float cube[2][2][2]; cube [0][0][0] = *(dockpars_fgrids + offset_cube_000 + mul_tmp); cube [1][0][0] = *(dockpars_fgrids + offset_cube_100 + mul_tmp); cube [0][1][0] = *(dockpars_fgrids + offset_cube_010 + mul_tmp); cube [1][1][0] = *(dockpars_fgrids + offset_cube_110 + mul_tmp); cube [0][0][1] = *(dockpars_fgrids + offset_cube_001 + mul_tmp); cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp); cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp); cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp); // ------------------------------------------------------------------- // Deltas dx, dy, dz are already normalized // (by host/src/getparameters.cpp) in OCLaDock. // The correspondance between vertices in xyz axes is: // 0, 1, 2, 3, 4, 5, 6, 7 and 000, 100, 010, 001, 101, 110, 011, 111 // ------------------------------------------------------------------- /* deltas: (x-x0)/(x1-x0), (y-y0... vertices: (000, 100, 010, 001, 101, 110, 011, 111) Z ' 3 - - - - 6 /. /| 4 - - - - 7 | | ' | | | 0 - - - + 2 -- Y '/ |/ 1 - - - - 5 / X */ // Intermediate values for vectors in x-direction float x10, x52, x43, x76; float vx_z0, vx_z1; // Intermediate values for vectors in y-direction float y20, y51, y63, y74; float vy_z0, vy_z1; // Intermediate values for vectors in z-direction float z30, z41, z62, z75; float vz_y0, vz_y1; // ------------------------------------------------------------------- // Calculating gradients (forces) corresponding to // "atype" intermolecular energy // Derived from autodockdev/maps.py // ------------------------------------------------------------------- // Vector in x-direction x10 = cube [1][0][0] - cube [0][0][0]; // z = 0 x52 = cube [1][1][0] - cube [0][1][0]; // z = 0 x43 = cube [1][0][1] - cube [0][0][1]; // z = 1 x76 = cube [1][1][1] - cube [0][1][1]; // z = 1 vx_z0 = (1 - dy) * x10 + dy * x52; // z = 0 vx_z1 = (1 - dy) * x43 + dy * x76; // z = 1 gradient_inter_x[atom_id] += (1 - dz) * vx_z0 + dz * vx_z1; // Vector in y-direction y20 = cube[0][1][0] - cube [0][0][0]; // z = 0 y51 = cube[1][1][0] - cube [1][0][0]; // z = 0 y63 = cube[0][1][1] - cube [0][0][1]; // z = 1 y74 = cube[1][1][1] - cube [1][0][1]; // z = 1 vy_z0 = (1 - dx) * y20 + dx * y51; // z = 0 vy_z1 = (1 - dx) * y63 + dx * y74; // z = 1 gradient_inter_y[atom_id] += (1 - dz) * vy_z0 + dz * vy_z1; // Vectors in z-direction z30 = cube [0][0][1] - cube [0][0][0]; // y = 0 z41 = cube [1][0][1] - cube [1][0][0]; // y = 0 z62 = cube [0][1][1] - cube [0][1][0]; // y = 1 z75 = cube [1][1][1] - cube [1][1][0]; // y = 1 vz_y0 = (1 - dx) * z30 + dx * z41; // y = 0 vz_y1 = (1 - dx) * z62 + dx * z75; // y = 1 gradient_inter_z[atom_id] += (1 - dy) * vz_y0 + dy * vz_y1; //printf("%-15s %-3u %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f\n", "atom aff", atom_id, vx_z0, vx_z1, vy_z0, vy_z1, vz_y0, vz_y1); // ------------------------------------------------------------------- // Calculating gradients (forces) corresponding to // "elec" intermolecular energy // Derived from autodockdev/maps.py // ------------------------------------------------------------------- // Capturing electrostatic values atom_typeid = dockpars_num_of_atypes; mul_tmp = atom_typeid*g3; cube [0][0][0] = *(dockpars_fgrids + offset_cube_000 + mul_tmp); cube [1][0][0] = *(dockpars_fgrids + offset_cube_100 + mul_tmp); cube [0][1][0] = *(dockpars_fgrids + offset_cube_010 + mul_tmp); cube [1][1][0] = *(dockpars_fgrids + offset_cube_110 + mul_tmp); cube [0][0][1] = *(dockpars_fgrids + offset_cube_001 + mul_tmp); cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp); cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp); cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp); // Vector in x-direction x10 = cube [1][0][0] - cube [0][0][0]; // z = 0 x52 = cube [1][1][0] - cube [0][1][0]; // z = 0 x43 = cube [1][0][1] - cube [0][0][1]; // z = 1 x76 = cube [1][1][1] - cube [0][1][1]; // z = 1 vx_z0 = (1 - dy) * x10 + dy * x52; // z = 0 vx_z1 = (1 - dy) * x43 + dy * x76; // z = 1 gradient_inter_x[atom_id] += q * ((1 - dz) * vx_z0 + dz * vx_z1); // Vector in y-direction y20 = cube[0][1][0] - cube [0][0][0]; // z = 0 y51 = cube[1][1][0] - cube [1][0][0]; // z = 0 y63 = cube[0][1][1] - cube [0][0][1]; // z = 1 y74 = cube[1][1][1] - cube [1][0][1]; // z = 1 vy_z0 = (1 - dx) * y20 + dx * y51; // z = 0 vy_z1 = (1 - dx) * y63 + dx * y74; // z = 1 gradient_inter_y[atom_id] += q *((1 - dz) * vy_z0 + dz * vy_z1); // Vectors in z-direction z30 = cube [0][0][1] - cube [0][0][0]; // y = 0 z41 = cube [1][0][1] - cube [1][0][0]; // y = 0 z62 = cube [0][1][1] - cube [0][1][0]; // y = 1 z75 = cube [1][1][1] - cube [1][1][0]; // y = 1 vz_y0 = (1 - dx) * z30 + dx * z41; // y = 0 vz_y1 = (1 - dx) * z62 + dx * z75; // y = 1 gradient_inter_z[atom_id] += q *((1 - dy) * vz_y0 + dy * vz_y1); //printf("%-15s %-3u %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f\n", "elec", atom_id, vx_z0, vx_z1, vy_z0, vy_z1, vz_y0, vz_y1); // ------------------------------------------------------------------- // Calculating gradients (forces) corresponding to // "dsol" intermolecular energy // Derived from autodockdev/maps.py // ------------------------------------------------------------------- // Capturing desolvation values atom_typeid = dockpars_num_of_atypes+1; mul_tmp = atom_typeid*g3; cube [0][0][0] = *(dockpars_fgrids + offset_cube_000 + mul_tmp); cube [1][0][0] = *(dockpars_fgrids + offset_cube_100 + mul_tmp); cube [0][1][0] = *(dockpars_fgrids + offset_cube_010 + mul_tmp); cube [1][1][0] = *(dockpars_fgrids + offset_cube_110 + mul_tmp); cube [0][0][1] = *(dockpars_fgrids + offset_cube_001 + mul_tmp); cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp); cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp); cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp); // Vector in x-direction x10 = cube [1][0][0] - cube [0][0][0]; // z = 0 x52 = cube [1][1][0] - cube [0][1][0]; // z = 0 x43 = cube [1][0][1] - cube [0][0][1]; // z = 1 x76 = cube [1][1][1] - cube [0][1][1]; // z = 1 vx_z0 = (1 - dy) * x10 + dy * x52; // z = 0 vx_z1 = (1 - dy) * x43 + dy * x76; // z = 1 gradient_inter_x[atom_id] += fabs(q) * ((1 - dz) * vx_z0 + dz * vx_z1); // Vector in y-direction y20 = cube[0][1][0] - cube [0][0][0]; // z = 0 y51 = cube[1][1][0] - cube [1][0][0]; // z = 0 y63 = cube[0][1][1] - cube [0][0][1]; // z = 1 y74 = cube[1][1][1] - cube [1][0][1]; // z = 1 vy_z0 = (1 - dx) * y20 + dx * y51; // z = 0 vy_z1 = (1 - dx) * y63 + dx * y74; // z = 1 gradient_inter_y[atom_id] += fabs(q) *((1 - dz) * vy_z0 + dz * vy_z1); // Vectors in z-direction z30 = cube [0][0][1] - cube [0][0][0]; // y = 0 z41 = cube [1][0][1] - cube [1][0][0]; // y = 0 z62 = cube [0][1][1] - cube [0][1][0]; // y = 1 z75 = cube [1][1][1] - cube [1][1][0]; // y = 1 vz_y0 = (1 - dx) * z30 + dx * z41; // y = 0 vz_y1 = (1 - dx) * z62 + dx * z75; // y = 1 gradient_inter_z[atom_id] += fabs(q) *((1 - dy) * vz_y0 + dy * vz_y1); //printf("%-15s %-3u %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f %-10.8f\n", "desol", atom_id, vx_z0, vx_z1, vy_z0, vy_z1, vz_y0, vz_y1); // ------------------------------------------------------------------- } } // End atom_id for-loop (INTERMOLECULAR ENERGY) // Inter- and intra-molecular energy calculation // are independent from each other, so NO barrier is needed here. // As these two require different operations, // they can be executed only sequentially on the GPU. // ================================================ // CALCULATING INTRAMOLECULAR GRADIENTS // ================================================ for (uint contributor_counter = get_local_id(0); contributor_counter < dockpars_num_of_intraE_contributors; contributor_counter+= NUM_OF_THREADS_PER_BLOCK) { // Storing in a private variable // the gradient contribution of each contributing atomic pair float priv_gradient_per_intracontributor= 0.0f; // Getting atom IDs uint atom1_id = intraE_contributors_const[3*contributor_counter]; uint atom2_id = intraE_contributors_const[3*contributor_counter+1]; /* printf ("%-5u %-5u %-5u\n", contributor_counter, atom1_id, atom2_id); */ // Calculating vector components of vector going // from first atom's to second atom's coordinates float subx = calc_coords_x[atom1_id] - calc_coords_x[atom2_id]; float suby = calc_coords_y[atom1_id] - calc_coords_y[atom2_id]; float subz = calc_coords_z[atom1_id] - calc_coords_z[atom2_id]; // Calculating atomic distance float dist = native_sqrt(subx*subx + suby*suby + subz*subz); float atomic_distance = dist*dockpars_grid_spacing; // Getting type IDs uint atom1_typeid = atom_types_const[atom1_id]; uint atom2_typeid = atom_types_const[atom2_id]; uint atom1_type_vdw_hb = atom1_types_reqm [atom1_typeid]; uint atom2_type_vdw_hb = atom2_types_reqm [atom2_typeid]; //printf ("%-5u %-5u %-5u\n", contributor_counter, atom1_id, atom2_id); // Getting optimum pair distance (opt_distance) from reqm and reqm_hbond // reqm: equilibrium internuclear separation // (sum of the vdW radii of two like atoms (A)) in the case of vdW // reqm_hbond: equilibrium internuclear separation // (sum of the vdW radii of two like atoms (A)) in the case of hbond float opt_distance; if (intraE_contributors_const[3*contributor_counter+2] == 1) //H-bond { opt_distance = reqm_hbond [atom1_type_vdw_hb] + reqm_hbond [atom2_type_vdw_hb]; } else //van der Waals { opt_distance = 0.5f*(reqm [atom1_type_vdw_hb] + reqm [atom2_type_vdw_hb]); } // Getting smoothed distance // smoothed_distance = function(atomic_distance, opt_distance) float smoothed_distance; float delta_distance = 0.5f*dockpars_smooth; if (atomic_distance <= (opt_distance - delta_distance)) { smoothed_distance = atomic_distance + delta_distance; } else if (atomic_distance < (opt_distance + delta_distance)) { smoothed_distance = opt_distance; } else { // else if (atomic_distance >= (opt_distance + delta_distance)) smoothed_distance = atomic_distance - delta_distance; } // Calculating gradient contributions // Cuttoff1: internuclear-distance at 8A only for vdw and hbond. if (atomic_distance < 8.0f) { // Calculating van der Waals / hydrogen bond term priv_gradient_per_intracontributor += native_divide (-12*VWpars_AC_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid], native_powr(smoothed_distance/*atomic_distance*/, 13) ); if (intraE_contributors_const[3*contributor_counter+2] == 1) { //H-bond priv_gradient_per_intracontributor += native_divide (10*VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid], native_powr(smoothed_distance/*atomic_distance*/, 11) ); } else { //van der Waals priv_gradient_per_intracontributor += native_divide (6*VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid], native_powr(smoothed_distance/*atomic_distance*/, 7) ); } } // if cuttoff1 - internuclear-distance at 8A // Calculating energy contributions // Cuttoff2: internuclear-distance at 20.48A only for el and sol. if (atomic_distance < 20.48f) { // Calculating electrostatic term // http://www.wolframalpha.com/input/?i=1%2F(x*(A%2B(B%2F(1%2BK*exp(-h*B*x))))) float upper = DIEL_A*native_powr(native_exp(DIEL_B_TIMES_H*atomic_distance) + DIEL_K, 2) + (DIEL_B)*native_exp(DIEL_B_TIMES_H*atomic_distance)*(DIEL_B_TIMES_H_TIMES_K*atomic_distance + native_exp(DIEL_B_TIMES_H*atomic_distance) + DIEL_K); float lower = native_powr(atomic_distance, 2) * native_powr(DIEL_A * (native_exp(DIEL_B_TIMES_H*atomic_distance) + DIEL_K) + DIEL_B * native_exp(DIEL_B_TIMES_H*atomic_distance), 2); priv_gradient_per_intracontributor += -dockpars_coeff_elec * atom_charges_const[atom1_id] * atom_charges_const[atom2_id] * native_divide (upper, lower); // Calculating desolvation term priv_gradient_per_intracontributor += ( (dspars_S_const[atom1_typeid] + dockpars_qasp*fabs(atom_charges_const[atom1_id])) * dspars_V_const[atom2_typeid] + (dspars_S_const[atom2_typeid] + dockpars_qasp*fabs(atom_charges_const[atom2_id])) * dspars_V_const[atom1_typeid] ) * dockpars_coeff_desolv * -0.07716049382716049 * atomic_distance * native_exp(-0.038580246913580245*native_powr(atomic_distance, 2)); } // if cuttoff2 - internuclear-distance at 20.48A // Decomposing "priv_gradient_per_intracontributor" // into the contribution of each atom of the pair float subx_div_dist = native_divide(subx, dist); float suby_div_dist = native_divide(suby, dist); float subz_div_dist = native_divide(subz, dist); float priv_intra_gradient_x = priv_gradient_per_intracontributor * subx_div_dist; float priv_intra_gradient_y = priv_gradient_per_intracontributor * suby_div_dist; float priv_intra_gradient_z = priv_gradient_per_intracontributor * subz_div_dist; // Calculating gradients in xyz components. // Gradients for both atoms in a single contributor pair // have the same magnitude, but opposite directions atomicSub_g_f(&gradient_intra_x[atom1_id], priv_intra_gradient_x); atomicSub_g_f(&gradient_intra_y[atom1_id], priv_intra_gradient_y); atomicSub_g_f(&gradient_intra_z[atom1_id], priv_intra_gradient_z); atomicAdd_g_f(&gradient_intra_x[atom2_id], priv_intra_gradient_x); atomicAdd_g_f(&gradient_intra_y[atom2_id], priv_intra_gradient_y); atomicAdd_g_f(&gradient_intra_z[atom2_id], priv_intra_gradient_z); } // End contributor_counter for-loop (INTRAMOLECULAR ENERGY) // Commented to remove unnecessary local storage which was // required by gradient_per_intracontributor[MAX_INTRAE_CONTRIBUTORS] /* barrier(CLK_LOCAL_MEM_FENCE); // Accumulating gradients from "gradient_per_intracontributor" for each each if (get_local_id(0) == 0) { for (uint contributor_counter = 0; contributor_counter < dockpars_num_of_intraE_contributors; contributor_counter ++) { // Getting atom IDs uint atom1_id = intraE_contributors_const[3*contributor_counter]; uint atom2_id = intraE_contributors_const[3*contributor_counter+1]; // Calculating xyz distances in Angstroms of vector // that goes from "atom1_id"-to-"atom2_id" float subx = (calc_coords_x[atom2_id] - calc_coords_x[atom1_id]); float suby = (calc_coords_y[atom2_id] - calc_coords_y[atom1_id]); float subz = (calc_coords_z[atom2_id] - calc_coords_z[atom1_id]); float dist = native_sqrt(subx*subx + suby*suby + subz*subz); float subx_div_dist = native_divide(subx, dist); float suby_div_dist = native_divide(suby, dist); float subz_div_dist = native_divide(subz, dist); // Calculating gradients in xyz components. // Gradients for both atoms in a single contributor pair // have the same magnitude, but opposite directions gradient_intra_x[atom1_id] -= gradient_per_intracontributor[contributor_counter] * subx_div_dist; gradient_intra_y[atom1_id] -= gradient_per_intracontributor[contributor_counter] * suby_div_dist; gradient_intra_z[atom1_id] -= gradient_per_intracontributor[contributor_counter] * subz_div_dist; gradient_intra_x[atom2_id] += gradient_per_intracontributor[contributor_counter] * subx_div_dist; gradient_intra_y[atom2_id] += gradient_per_intracontributor[contributor_counter] * suby_div_dist; gradient_intra_z[atom2_id] += gradient_per_intracontributor[contributor_counter] * subz_div_dist; //printf("%-20s %-10u %-5u %-5u %-10.8f\n", "grad_intracontrib", contributor_counter, atom1_id, atom2_id, gradient_per_intracontributor[contributor_counter]); } } */ barrier(CLK_LOCAL_MEM_FENCE); // Accumulating inter- and intramolecular gradients for (uint atom_cnt = get_local_id(0); atom_cnt < dockpars_num_of_atoms; atom_cnt+= NUM_OF_THREADS_PER_BLOCK) { // Grid gradients were calculated in the grid space, // so they have to be put back in Angstrom. // Intramolecular gradients were already in Angstrom, // so no scaling for them is required. gradient_inter_x[atom_cnt] = native_divide(gradient_inter_x[atom_cnt], dockpars_grid_spacing); gradient_inter_y[atom_cnt] = native_divide(gradient_inter_y[atom_cnt], dockpars_grid_spacing); gradient_inter_z[atom_cnt] = native_divide(gradient_inter_z[atom_cnt], dockpars_grid_spacing); // Re-using "gradient_inter_*" for total gradient (inter+intra) gradient_inter_x[atom_cnt] += gradient_intra_x[atom_cnt]; gradient_inter_y[atom_cnt] += gradient_intra_y[atom_cnt]; gradient_inter_z[atom_cnt] += gradient_intra_z[atom_cnt]; //printf("%-15s %-5u %-10.8f %-10.8f %-10.8f\n", "grad_grid", atom_cnt, gradient_inter_x[atom_cnt], gradient_inter_y[atom_cnt], gradient_inter_z[atom_cnt]); //printf("%-15s %-5u %-10.8f %-10.8f %-10.8f\n", "grad_intra", atom_cnt, gradient_intra_x[atom_cnt], gradient_intra_y[atom_cnt], gradient_intra_z[atom_cnt]); //printf("%-15s %-5u %-10.8f %-10.8f %-10.8f\n", "calc_coords", atom_cnt, calc_coords_x[atom_cnt], calc_coords_y[atom_cnt], calc_coords_z[atom_cnt]); } barrier(CLK_LOCAL_MEM_FENCE); // ------------------------------------------ // Obtaining translation-related gradients // ------------------------------------------ if (get_local_id(0) == 0) { for (uint lig_atom_id = 0; lig_atom_id PI_FLOAT) ? true: false; #if defined (DEBUG_GRAD_ROTATION_GENES) printf("%-30s %-10.5f %-10.5f %-10.5f\n", "current_axisangle (1,2,3): ", current_phi, current_theta, current_rotangle); #endif // This is where we are in quaternion space // current_q = oclacube_to_quaternion(angles) float4 current_q; // Axis of rotation float rotaxis_x = native_sin(current_theta) * native_cos(current_phi); float rotaxis_y = native_sin(current_theta) * native_sin(current_phi); float rotaxis_z = native_cos(current_theta); float ang; ang = current_rotangle * 0.5f; current_q.w = native_cos(ang); current_q.x = rotaxis_x * native_sin(ang); current_q.y = rotaxis_y * native_sin(ang); current_q.z = rotaxis_z * native_sin(ang); #if defined (DEBUG_GRAD_ROTATION_GENES) printf("%-30s %-10.8f %-10.8f %-10.8f %-10.8f\n", "current_q (w,x,y,z): ", current_q.w, current_q.x, current_q.y, current_q.z); #endif // This is where we want to be in quaternion space float4 target_q; // target_q = rotation.q_mult(q, current_q) // Derived from autodockdev/rotation.py/q_mult() // In our terms means q_mult(quat_{w|x|y|z}, current_q{w|x|y|z}) target_q.w = quat_torque.w*current_q.w - quat_torque.x*current_q.x - quat_torque.y*current_q.y - quat_torque.z*current_q.z;// w target_q.x = quat_torque.w*current_q.x + quat_torque.x*current_q.w + quat_torque.y*current_q.z - quat_torque.z*current_q.y;// x target_q.y = quat_torque.w*current_q.y + quat_torque.y*current_q.w + quat_torque.z*current_q.x - quat_torque.x*current_q.z;// y target_q.z = quat_torque.w*current_q.z + quat_torque.z*current_q.w + quat_torque.x*current_q.y - quat_torque.y*current_q.x;// z #if defined (DEBUG_GRAD_ROTATION_GENES) printf("%-30s %-10.8f %-10.8f %-10.8f %-10.8f\n", "target_q (w,x,y,z): ", target_q.w, target_q.x, target_q.y, target_q.z); #endif // This is where we want to be in the orientation axis-angle space float target_phi, target_theta, target_rotangle; // target_oclacube = quaternion_to_oclacube(target_q, theta_larger_than_pi) // Derived from autodockdev/motions.py/quaternion_to_oclacube() // In our terms means quaternion_to_oclacube(target_q{w|x|y|z}, theta_larger_than_pi) ang = acos(target_q.w); target_rotangle = 2.0f * ang; float inv_sin_ang = native_recip(native_sin(ang)); rotaxis_x = target_q.x * inv_sin_ang; rotaxis_y = target_q.y * inv_sin_ang; rotaxis_z = target_q.z * inv_sin_ang; target_theta = acos(rotaxis_z); if (is_theta_gt_pi == false) { target_phi = remainder((atan2( rotaxis_y, rotaxis_x) + PI_TIMES_2), PI_TIMES_2); } else { target_phi = remainder((atan2(-rotaxis_y, -rotaxis_x) + PI_TIMES_2), PI_TIMES_2); target_theta = PI_TIMES_2 - target_theta; } #if defined (DEBUG_GRAD_ROTATION_GENES) printf("%-30s %-10.8f %-10.8f %-10.8f\n", "target_axisangle (1,2,3) - after mapping: ", target_phi, target_theta, target_rotangle); #endif // The infinitesimal rotation will produce an infinitesimal displacement // in shoemake space. This is to guarantee that the direction of // the displacement in shoemake space is not distorted. // The correct amount of displacement in shoemake space is obtained // by multiplying the infinitesimal displacement by shoemake_scaling: //float shoemake_scaling = native_divide(torque_length, INFINITESIMAL_RADIAN/*infinitesimal_radian*/); float orientation_scaling = torque_length * INV_INFINITESIMAL_RADIAN; // Derivates in cube3 float grad_phi, grad_theta, grad_rotangle; /* grad_phi = orientation_scaling * (target_phi - current_phi); grad_theta = orientation_scaling * (target_theta - current_theta); grad_rotangle = orientation_scaling * (target_rotangle - current_rotangle); */ grad_phi = orientation_scaling * (remainder(target_phi - current_phi + PI_FLOAT, PI_TIMES_2) - PI_FLOAT); grad_theta = orientation_scaling * (remainder(target_theta - current_theta + PI_FLOAT, PI_TIMES_2) - PI_FLOAT); grad_rotangle = orientation_scaling * (remainder(target_rotangle - current_rotangle + PI_FLOAT, PI_TIMES_2) - PI_FLOAT); #if defined (DEBUG_GRAD_ROTATION_GENES) printf("%-30s %-10.8f %-10.8f %-10.8f\n", "grad_axisangle (1,2,3) - before emp. scaling: ", grad_phi, grad_theta, grad_rotangle); #endif // Corrections of derivatives // Constant arrays have 1000 elements. // Each array spans approximatedly from 0.0 to 2*PI. // The distance between each x-point (angle-delta) is 2*PI/1000. const float angle_delta = 0.00628353f; const float inv_angle_delta = 159.154943; // Correcting theta gradients interpolating // values from correction look-up-tables // (X0,Y0) and (X1,Y1) are known points // How to find the Y value in the straight line between Y0 and Y1, // corresponding to a certain X? /* | dependence_on_theta_const | dependence_on_rotangle_const | | | Y1 | | Y=? | Y0 |_________________________________ angle_const X0 X X1 */ // Finding the index-position of "grad_delta" in the "angle_const" array //uint index_theta = floor(native_divide(current_theta - angle_const[0], angle_delta)); //uint index_rotangle = floor(native_divide(current_rotangle - angle_const[0], angle_delta)); uint index_theta = floor((current_theta - angle_const[0]) * inv_angle_delta); uint index_rotangle = floor((current_rotangle - angle_const[0]) * inv_angle_delta); // Interpolating theta values // X0 -> index - 1 // X1 -> index + 1 // Expresed as weighted average: // Y = [Y0 * ((X1 - X) / (X1-X0))] + [Y1 * ((X - X0) / (X1-X0))] // Simplified for GPU (less terms): // Y = [Y0 * (X1 - X) + Y1 * (X - X0)] / (X1 - X0) // Taking advantage of constant: // Y = [Y0 * (X1 - X) + Y1 * (X - X0)] * inv_angle_delta float X0_theta, Y0_theta; float X1_theta, Y1_theta; float X_theta; float dependence_on_theta; //Y = dependence_on_theta X_theta = current_theta; // Using interpolation on out-of-bounds elements results in hang if (index_theta <= 0) { //printf("WARNING: index_theta: %u\n", index_theta); dependence_on_theta = dependence_on_theta_const[0]; //printf("%f\n",dependence_on_theta_const[0]); } else if (index_theta >= 999){ //printf("WARNING: index_theta: %u\n", index_theta); dependence_on_theta = dependence_on_theta_const[999]; //printf("%f\n",dependence_on_theta_const[999]); } else { X0_theta = angle_const[index_theta]; Y0_theta = dependence_on_theta_const[index_theta]; X1_theta = angle_const[index_theta+1]; Y1_theta = dependence_on_theta_const[index_theta+1]; } dependence_on_theta = (Y0_theta * (X1_theta-X_theta) + Y1_theta * (X_theta-X0_theta)) * inv_angle_delta; // Interpolating rotangle values float X0_rotangle, Y0_rotangle; float X1_rotangle, Y1_rotangle; float X_rotangle; float dependence_on_rotangle; //Y = dependence_on_rotangle X_rotangle = current_rotangle; // Using interpolation on previous and/or next elements results in hang if (index_rotangle <= 0) { //printf("WARNING: index_rotangle: %u\n", index_rotangle); dependence_on_rotangle = dependence_on_rotangle_const[0]; //printf("%f\n",dependence_on_rotangle_const[0]); } else if (index_rotangle >= 999){ //printf("WARNING: index_rotangle: %u\n", index_rotangle); dependence_on_rotangle = dependence_on_rotangle_const[999]; //printf("%f\n",dependence_on_rotangle_const[999]); } else { X0_rotangle = angle_const[index_rotangle]; Y0_rotangle = dependence_on_rotangle_const[index_rotangle]; X1_rotangle = angle_const[index_rotangle+1]; Y1_rotangle = dependence_on_rotangle_const[index_rotangle+1]; } dependence_on_rotangle = (Y0_rotangle * (X1_rotangle-X_rotangle) + Y1_rotangle * (X_rotangle-X0_rotangle)) * inv_angle_delta; #if defined (DEBUG_GRAD_ROTATION_GENES) printf("%-30s %-10.8f %-10.8f %-10.8f\n", "grad_axisangle (1,2,3) - after emp. scaling: ", grad_phi, grad_theta, grad_rotangle); #endif // Setting gradient rotation-related genotypes in cube // Multiplicating by DEG_TO_RAD is to make it uniform to DEG (see torsion gradients) gradient_genotype[3] = native_divide(grad_phi, (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD; gradient_genotype[4] = native_divide(grad_theta, dependence_on_rotangle) * DEG_TO_RAD; gradient_genotype[5] = grad_rotangle * DEG_TO_RAD; } // ------------------------------------------ // Obtaining torsion-related gradients // ------------------------------------------ //---------------------------------- // fastergrad //---------------------------------- /* if (get_local_id(0) == 2) { for (uint rotbond_id = 0; rotbond_id < dockpars_num_of_genes-6; rotbond_id ++) { */ for (uint rotbond_id = get_local_id(0); rotbond_id < dockpars_num_of_genes-6; rotbond_id +=NUM_OF_THREADS_PER_BLOCK) { //---------------------------------- // Querying ids of atoms belonging to the rotatable bond in question int atom1_id = rotbonds_const[2*rotbond_id]; int atom2_id = rotbonds_const[2*rotbond_id+1]; float3 atomRef_coords; atomRef_coords.x = calc_coords_x[atom1_id]; atomRef_coords.y = calc_coords_y[atom1_id]; atomRef_coords.z = calc_coords_z[atom1_id]; #if defined (DEBUG_GRAD_TORSION_GENES) printf("%-15s %-10u\n", "rotbond_id: ", rotbond_id); printf("%-15s %-10i\n", "atom1_id: ", atom1_id); printf("%-15s %-10.8f %-10.8f %-10.8f\n", "atom1_coords: ", calc_coords_x[atom1_id], calc_coords_y[atom1_id], calc_coords_z[atom1_id]); printf("%-15s %-10i\n", "atom2_id: ", atom2_id); printf("%-15s %-10.8f %-10.8f %-10.8f\n", "atom2_coords: ", calc_coords_x[atom2_id], calc_coords_y[atom2_id], calc_coords_z[atom2_id]); printf("\n"); #endif float3 rotation_unitvec; /* rotation_unitvec.x = rotbonds_unit_vectors_const[3*rotbond_id]; rotation_unitvec.y = rotbonds_unit_vectors_const[3*rotbond_id+1]; rotation_unitvec.z = rotbonds_unit_vectors_const[3*rotbond_id+2]; */ rotation_unitvec.x = calc_coords_x[atom2_id] - calc_coords_x[atom1_id]; rotation_unitvec.y = calc_coords_y[atom2_id] - calc_coords_y[atom1_id]; rotation_unitvec.z = calc_coords_z[atom2_id] - calc_coords_z[atom1_id]; rotation_unitvec = fast_normalize(rotation_unitvec); // Torque of torsions float3 torque_tor; torque_tor.x = 0.0f; torque_tor.y = 0.0f; torque_tor.z = 0.0f; // Iterating over each ligand atom that rotates // if the bond in question rotates for (uint rotable_atom_cnt = 0; rotable_atom_cnt