calcenergy.cl 44.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*

OCLADock, an OpenCL implementation of AutoDock 4.2 running a Lamarckian Genetic Algorithm
Copyright (C) 2017 TU Darmstadt, Embedded Systems and Applications Group, Germany. All rights reserved.

AutoDock is a Trade Mark of the Scripps Research Institute.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

*/


25
#include "calcenergy_basic.h"
Leonardo Solis's avatar
Leonardo Solis committed
26
27
28
29

// All related pragmas are in defines.h (accesible by host and device code)

void gpu_calc_energy(	    int    dockpars_rotbondlist_length,
Leonardo Solis's avatar
Leonardo Solis committed
30
31
32
33
			    char   dockpars_num_of_atoms,
			    char   dockpars_gridsize_x,
			    char   dockpars_gridsize_y,
			    char   dockpars_gridsize_z,
Leonardo Solis's avatar
Leonardo Solis committed
34
		#if defined (RESTRICT_ARGS)
Leonardo Solis's avatar
Leonardo Solis committed
35
			__global const float* restrict dockpars_fgrids, // cannot be allocated in __constant (too large)
Leonardo Solis's avatar
Leonardo Solis committed
36
		#else
37
			__global const float* dockpars_fgrids, 		// cannot be allocated in __constant (too large)
Leonardo Solis's avatar
Leonardo Solis committed
38
		#endif
Leonardo Solis's avatar
Leonardo Solis committed
39
40
41
42
43
44
		            char   dockpars_num_of_atypes,
		            int    dockpars_num_of_intraE_contributors,
			    float  dockpars_grid_spacing,
			    float  dockpars_coeff_elec,
			    float  dockpars_qasp,
			    float  dockpars_coeff_desolv,
Leonardo Solis's avatar
Leonardo Solis committed
45

46
47
48
49
                    // Some OpenCL compilers don't allow declaring 
		    // local variables within non-kernel functions.
		    // These local variables must be declared in a kernel, 
		    // and then passed to non-kernel functions.
50
51
52
53
		    __local float* genotype,
		    __local float* energy,
		    __local int*   run_id,

Leonardo Solis's avatar
Leonardo Solis committed
54
55
56
57
		    __local float* calc_coords_x,
		    __local float* calc_coords_y,
		    __local float* calc_coords_z,
		    __local float* partial_energies,
Leonardo Solis's avatar
Leonardo Solis committed
58

59
60
61
62
63
64
65
66
67
68
69
70
71
	         __constant float* atom_charges_const,
                 __constant char*  atom_types_const,
                 __constant char*  intraE_contributors_const,
                 __constant float* VWpars_AC_const,
                 __constant float* VWpars_BD_const,
                 __constant float* dspars_S_const,
                 __constant float* dspars_V_const,
                 __constant int*   rotlist_const,
                 __constant float* ref_coords_x_const,
                 __constant float* ref_coords_y_const,
                 __constant float* ref_coords_z_const,
                 __constant float* rotbonds_moving_vectors_const,
                 __constant float* rotbonds_unit_vectors_const,
72
73
74
75
76
                 __constant float* ref_orientation_quats_const,

		    // Gradient-related arguments
		    // Calculate gradients (forces) for intermolecular energy
		    // Derived from autodockdev/maps.py
77
		
78
79
80
		    // "is_enabled_gradient_calc": enables gradient calculation.
		    // In Genetic-Generation: no need for gradients
		    // In Gradient-Minimizer: must calculate gradients
81
82
83
		    __local bool*  is_enabled_gradient_calc,
	    	    __local float* gradient_inter_x,
	            __local float* gradient_inter_y,
84
85
	            __local float* gradient_inter_z,
		    __local float* gradient_genotype	
86
			
Leonardo Solis's avatar
Leonardo Solis committed
87
88
89
90
91
92
93
94
95
)

//The GPU device function calculates the energy of the entity described by genotype, dockpars and the liganddata
//arrays in constant memory and returns it in the energy parameter. The parameter run_id has to be equal to the ID
//of the run whose population includes the current entity (which can be determined with blockIdx.x), since this
//determines which reference orientation should be used.
{
	partial_energies[get_local_id(0)] = 0.0f;

96
97
98
99
100
	// -------------------------------------------------------------------
	// Calculate gradients (forces) for intermolecular energy
	// Derived from autodockdev/maps.py
	// -------------------------------------------------------------------
	if (*is_enabled_gradient_calc) {
101
102
103
104
105
106
		for (uint atom_id = get_local_id(0);
		          atom_id < dockpars_num_of_atoms;
		          atom_id+= NUM_OF_THREADS_PER_BLOCK) {
			gradient_inter_x[atom_id] = 0.0f;
			gradient_inter_y[atom_id] = 0.0f;
			gradient_inter_z[atom_id] = 0.0f;
107
108
109
		}
	}

110
#if 0
111
112
113
114
	// Rotational genes in the Shoemake space expressed in radians
	float u1, u2, u3; 
	
	u1 = genotype[3];
115
116
117
	u2 = genotype[4]/**DEG_TO_RAD*/;
	u3 = genotype[5]/**DEG_TO_RAD*/;
#endif
Leonardo Solis's avatar
Leonardo Solis committed
118

119
#if defined (IMPROVE_GRID)
Leonardo Solis's avatar
Leonardo Solis committed
120
121
	// INTERMOLECULAR for-loop (intermediate results)
	// It stores a product of two chars
122
	//uint mul_tmp;
Leonardo Solis's avatar
Leonardo Solis committed
123

124
125
126
	uchar g1 = dockpars_gridsize_x;
	uint  g2 = dockpars_gridsize_x * dockpars_gridsize_y;
  	uint  g3 = dockpars_gridsize_x * dockpars_gridsize_y * dockpars_gridsize_z;
Leonardo Solis's avatar
Leonardo Solis committed
127
#else
128

129
#endif
130

Leonardo Solis's avatar
Leonardo Solis committed
131
	// ================================================
132
	// CALCULATE ATOMIC POSITIONS AFTER ROTATIONS
Leonardo Solis's avatar
Leonardo Solis committed
133
	// ================================================
134
135
136
	for (uint rotation_counter = get_local_id(0);
	          rotation_counter < dockpars_rotbondlist_length;
	          rotation_counter+=NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
137
	{
138
		int rotation_list_element = rotlist_const[rotation_counter];
Leonardo Solis's avatar
Leonardo Solis committed
139

140
		if ((rotation_list_element & RLIST_DUMMY_MASK) == 0)	// If not dummy rotation
Leonardo Solis's avatar
Leonardo Solis committed
141
		{
142
143
144
145
			uint atom_id = rotation_list_element & RLIST_ATOMID_MASK;

			// Capturing atom coordinates
			float atom_to_rotate[3];
Leonardo Solis's avatar
Leonardo Solis committed
146

147
			if ((rotation_list_element & RLIST_FIRSTROT_MASK) != 0)	// If first rotation of this atom
Leonardo Solis's avatar
Leonardo Solis committed
148
149
150
151
152
153
154
155
156
157
158
159
			{
				atom_to_rotate[0] = ref_coords_x_const[atom_id];
				atom_to_rotate[1] = ref_coords_y_const[atom_id];
				atom_to_rotate[2] = ref_coords_z_const[atom_id];
			}
			else
			{
				atom_to_rotate[0] = calc_coords_x[atom_id];
				atom_to_rotate[1] = calc_coords_y[atom_id];
				atom_to_rotate[2] = calc_coords_z[atom_id];
			}

160
161
162
163
164
165
166
			// Capturing rotation vectors and angle
			float rotation_movingvec[3];

			float quatrot_left_x, quatrot_left_y, quatrot_left_z, quatrot_left_q;
			float quatrot_temp_x, quatrot_temp_y, quatrot_temp_z, quatrot_temp_q;

			if ((rotation_list_element & RLIST_GENROT_MASK) != 0)	// If general rotation
Leonardo Solis's avatar
Leonardo Solis committed
167
			{
168
169
170
171
172
				// -------------------------------------------------------------------
				// Replacing rotation genes: from spherical space to Shoemake space
				// gene [0:2]: translation -> kept as original x, y, z
				// gene [3:5]: rotation    -> transformed into Shoemake (u1: adimensional, u2&u3: sexagesimal)
				// gene [6:N]: torsions	   -> kept as original angles	(all in sexagesimal)
173

174
175
176
				// Shoemake ranges:
				// u1: [0, 1]
				// u2: [0: 2PI] or [0: 360]
177

178
179
180
				// Random generator in the host is changed:
				// LCG (original, myrand()) -> CPP std (rand())
				// -------------------------------------------------------------------
181

182
183
184
				// Transforming Shoemake (u1, u2, u3) genes into quaternions
				// FIXME: add precision choices with preprocessor directives: 
				// NATIVE_PRECISION, HALF_PRECISION, Full precision
185

186
187
188
189
190
191
192
193
194
195
196
197
				// Rotational genes in the Shoemake space expressed in radians
				float u1, u2, u3; 
	
				u1 = genotype[3];
				u2 = genotype[4]/**DEG_TO_RAD*/;
				u3 = genotype[5]/**DEG_TO_RAD*/;

				// u1, u2, u3 should be within their valid range of [0,1]
				quatrot_left_q = native_sqrt(1 - u1) * native_sin(PI_TIMES_2*u2); 
				quatrot_left_x = native_sqrt(1 - u1) * native_cos(PI_TIMES_2*u2);
				quatrot_left_y = native_sqrt(u1)     * native_sin(PI_TIMES_2*u3);
				quatrot_left_z = native_sqrt(u1)     * native_cos(PI_TIMES_2*u3);
198

Leonardo Solis's avatar
Leonardo Solis committed
199
200
201
202
				rotation_movingvec[0] = genotype[0];
				rotation_movingvec[1] = genotype[1];
				rotation_movingvec[2] = genotype[2];
			}
203
			else	// If rotating around rotatable bond
Leonardo Solis's avatar
Leonardo Solis committed
204
			{
205
				uint rotbond_id = (rotation_list_element & RLIST_RBONDID_MASK) >> RLIST_RBONDID_SHIFT;
Leonardo Solis's avatar
Leonardo Solis committed
206

207
				float rotation_unitvec[3];
Leonardo Solis's avatar
Leonardo Solis committed
208
209
210
				rotation_unitvec[0] = rotbonds_unit_vectors_const[3*rotbond_id];
				rotation_unitvec[1] = rotbonds_unit_vectors_const[3*rotbond_id+1];
				rotation_unitvec[2] = rotbonds_unit_vectors_const[3*rotbond_id+2];
211
				float rotation_angle = genotype[6+rotbond_id]*DEG_TO_RAD;
Leonardo Solis's avatar
Leonardo Solis committed
212
213
214
215
216

				rotation_movingvec[0] = rotbonds_moving_vectors_const[3*rotbond_id];
				rotation_movingvec[1] = rotbonds_moving_vectors_const[3*rotbond_id+1];
				rotation_movingvec[2] = rotbonds_moving_vectors_const[3*rotbond_id+2];

217
218
				// Performing additionally the first movement which 
				// is needed only if rotating around rotatable bond
Leonardo Solis's avatar
Leonardo Solis committed
219
220
221
				atom_to_rotate[0] -= rotation_movingvec[0];
				atom_to_rotate[1] -= rotation_movingvec[1];
				atom_to_rotate[2] -= rotation_movingvec[2];
222
223
224
225

				// Transforming torsion angles into quaternions
				// FIXME: add precision choices with preprocessor directives: 
				// NATIVE_PRECISION, HALF_PRECISION, Full precision
226
227
228
229
230
231
				rotation_angle  = native_divide(rotation_angle, 2.0f);
				float sin_angle = native_sin(rotation_angle);
				quatrot_left_q  = native_cos(rotation_angle);
				quatrot_left_x  = sin_angle*rotation_unitvec[0];
				quatrot_left_y  = sin_angle*rotation_unitvec[1];
				quatrot_left_z  = sin_angle*rotation_unitvec[2];
Leonardo Solis's avatar
Leonardo Solis committed
232
233
			}

234
235
236
237
			// Performing rotation
			if ((rotation_list_element & RLIST_GENROT_MASK) != 0)	// If general rotation,
										// two rotations should be performed
										// (multiplying the quaternions)
Leonardo Solis's avatar
Leonardo Solis committed
238
			{
239
240
				// Calculating quatrot_left*ref_orientation_quats_const,
				// which means that reference orientation rotation is the first
Leonardo Solis's avatar
Leonardo Solis committed
241
242
243
244
245
246
				quatrot_temp_q = quatrot_left_q;
				quatrot_temp_x = quatrot_left_x;
				quatrot_temp_y = quatrot_left_y;
				quatrot_temp_z = quatrot_left_z;

				quatrot_left_q = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)]-
247
248
249
						 quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+1]-
						 quatrot_temp_y*ref_orientation_quats_const[4*(*run_id)+2]-
						 quatrot_temp_z*ref_orientation_quats_const[4*(*run_id)+3];
Leonardo Solis's avatar
Leonardo Solis committed
250
				quatrot_left_x = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+1]+
251
252
253
						 ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_x+
						 quatrot_temp_y*ref_orientation_quats_const[4*(*run_id)+3]-
						 ref_orientation_quats_const[4*(*run_id)+2]*quatrot_temp_z;
Leonardo Solis's avatar
Leonardo Solis committed
254
				quatrot_left_y = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+2]+
255
256
257
						 ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_y+
						 ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_z-
						 quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+3];
Leonardo Solis's avatar
Leonardo Solis committed
258
				quatrot_left_z = quatrot_temp_q*ref_orientation_quats_const[4*(*run_id)+3]+
259
260
261
						 ref_orientation_quats_const[4*(*run_id)]*quatrot_temp_z+
						 quatrot_temp_x*ref_orientation_quats_const[4*(*run_id)+2]-
						 ref_orientation_quats_const[4*(*run_id)+1]*quatrot_temp_y;
Leonardo Solis's avatar
Leonardo Solis committed
262
263
264
			}

			quatrot_temp_q = 0 -
265
266
267
					 quatrot_left_x*atom_to_rotate [0] -
					 quatrot_left_y*atom_to_rotate [1] -
					 quatrot_left_z*atom_to_rotate [2];
Leonardo Solis's avatar
Leonardo Solis committed
268
			quatrot_temp_x = quatrot_left_q*atom_to_rotate [0] +
269
270
					 quatrot_left_y*atom_to_rotate [2] -
					 quatrot_left_z*atom_to_rotate [1];
Leonardo Solis's avatar
Leonardo Solis committed
271
			quatrot_temp_y = quatrot_left_q*atom_to_rotate [1] -
272
273
					 quatrot_left_x*atom_to_rotate [2] +
					 quatrot_left_z*atom_to_rotate [0];
Leonardo Solis's avatar
Leonardo Solis committed
274
			quatrot_temp_z = quatrot_left_q*atom_to_rotate [2] +
275
276
					 quatrot_left_x*atom_to_rotate [1] -
					 quatrot_left_y*atom_to_rotate [0];
Leonardo Solis's avatar
Leonardo Solis committed
277
278

			atom_to_rotate [0] = 0 -
279
280
281
282
					  quatrot_temp_q*quatrot_left_x +
					  quatrot_temp_x*quatrot_left_q -
					  quatrot_temp_y*quatrot_left_z +
					  quatrot_temp_z*quatrot_left_y;
Leonardo Solis's avatar
Leonardo Solis committed
283
			atom_to_rotate [1] = 0 -
284
285
286
287
					  quatrot_temp_q*quatrot_left_y +
					  quatrot_temp_x*quatrot_left_z +
					  quatrot_temp_y*quatrot_left_q -
					  quatrot_temp_z*quatrot_left_x;
Leonardo Solis's avatar
Leonardo Solis committed
288
			atom_to_rotate [2] = 0 -
289
290
291
292
					  quatrot_temp_q*quatrot_left_z -
					  quatrot_temp_x*quatrot_left_y +
					  quatrot_temp_y*quatrot_left_x +
					  quatrot_temp_z*quatrot_left_q;
Leonardo Solis's avatar
Leonardo Solis committed
293

294
			// Performing final movement and storing values
Leonardo Solis's avatar
Leonardo Solis committed
295
296
297
298
299
300
301
302
303
304
			calc_coords_x[atom_id] = atom_to_rotate [0] + rotation_movingvec[0];
			calc_coords_y[atom_id] = atom_to_rotate [1] + rotation_movingvec[1];
			calc_coords_z[atom_id] = atom_to_rotate [2] + rotation_movingvec[2];

		} // End if-statement not dummy rotation

		barrier(CLK_LOCAL_MEM_FENCE);

	} // End rotation_counter for-loop

305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
	// -------------------------------------------------------------------
	// Calculate gradients (forces) for intermolecular energy
	// Derived from autodockdev/maps.py
	// -------------------------------------------------------------------
	// Variables to store gradient of 
	// the intermolecular energy per each ligand atom

	// Some OpenCL compilers don't allow declaring 
	// local variables within non-kernel functions.
	// These local variables must be declared in a kernel, 
	// and then passed to non-kernel functions.
	/*
	__local float gradient_inter_x[MAX_NUM_OF_ATOMS];
	__local float gradient_inter_y[MAX_NUM_OF_ATOMS];
	__local float gradient_inter_z[MAX_NUM_OF_ATOMS];
	*/

	// Deltas dx, dy, dz are already normalized 
	// (by host/src/getparameters.cpp) in OCLaDock.
	// The correspondance between vertices in xyz axes is:
	// 0, 1, 2, 3, 4, 5, 6, 7  and  000, 100, 010, 001, 101, 110, 011, 111
	/*
            deltas: (x-x0)/(x1-x0), (y-y0...
            vertices: (000, 100, 010, 001, 101, 110, 011, 111)        

                  Z
                  '
                  3 - - - - 6
                 /.        /|
                4 - - - - 7 |
                | '       | |
                | 0 - - - + 2 -- Y
                '/        |/
                1 - - - - 5
               /
              X
	*/

	// Intermediate values for vectors in x-direction
	float x10, x52, x43, x76;
	float vx_z0, vx_z1;

	// Intermediate values for vectors in y-direction
	float y20, y51, y63, y74;
	float vy_z0, vy_z1;

	// Intermediate values for vectors in z-direction
	float z30, z41, z62, z75;
	float vz_y0, vz_y1;
	// -------------------------------------------------------------------

Leonardo Solis's avatar
Leonardo Solis committed
356
357
358
	// ================================================
	// CALCULATE INTERMOLECULAR ENERGY
	// ================================================
359
360
361
	for (uint atom_id = get_local_id(0);
	          atom_id < dockpars_num_of_atoms;
	          atom_id+= NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
362
	{
363
364
365
366
367
		uint atom_typeid = atom_types_const[atom_id];
		float x = calc_coords_x[atom_id];
		float y = calc_coords_y[atom_id];
		float z = calc_coords_z[atom_id];
		float q = atom_charges_const[atom_id];
Leonardo Solis's avatar
Leonardo Solis committed
368
369

		if ((x < 0) || (y < 0) || (z < 0) || (x >= dockpars_gridsize_x-1)
370
371
				                  || (y >= dockpars_gridsize_y-1)
						  || (z >= dockpars_gridsize_z-1)){
Leonardo Solis's avatar
Leonardo Solis committed
372
			partial_energies[get_local_id(0)] += 16777216.0f; //100000.0f;
373
374
375
376
377
378
379
380
			
			// -------------------------------------------------------------------
			// Calculate gradients (forces) for intermolecular energy
			// Derived from autodockdev/maps.py
			// -------------------------------------------------------------------

			if (*is_enabled_gradient_calc) {
				// Penalty values are valid as long as they are high
381
382
383
				gradient_inter_x[atom_id] += 16777216.0f;
				gradient_inter_y[atom_id] += 16777216.0f;
				gradient_inter_z[atom_id] += 16777216.0f;
384
			}
385
			// -------------------------------------------------------------------
Leonardo Solis's avatar
Leonardo Solis committed
386
387
388
		}
		else
		{
389
390
391
392
393
394
395
396
397
398
399
400
401
			// Get coordinates
			int x_low  = (int)floor(x); 
			int y_low  = (int)floor(y); 
			int z_low  = (int)floor(z);
			int x_high = (int)ceil(x); 
			int y_high = (int)ceil(y); 
			int z_high = (int)ceil(z);
			float dx = x - x_low; 
			float dy = y - y_low; 
			float dz = z - z_low;

			// Calculate interpolation weights
			float weights[2][2][2];
Leonardo Solis's avatar
Leonardo Solis committed
402
403
404
405
406
407
408
409
410
			weights [0][0][0] = (1-dx)*(1-dy)*(1-dz);
			weights [1][0][0] = dx*(1-dy)*(1-dz);
			weights [0][1][0] = (1-dx)*dy*(1-dz);
			weights [1][1][0] = dx*dy*(1-dz);
			weights [0][0][1] = (1-dx)*(1-dy)*dz;
			weights [1][0][1] = dx*(1-dy)*dz;
			weights [0][1][1] = (1-dx)*dy*dz;
			weights [1][1][1] = dx*dy*dz;

411
			// Capture affinity values
Leonardo Solis's avatar
Leonardo Solis committed
412
#if defined (IMPROVE_GRID)
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
			uint ylow_times_g1  = y_low*g1;
			uint yhigh_times_g1 = y_high*g1;
		  	uint zlow_times_g2  = z_low*g2;
			uint zhigh_times_g2 = z_high*g2;

			// Grid offset
			uint offset_cube_000 = x_low  + ylow_times_g1  + zlow_times_g2;
			uint offset_cube_100 = x_high + ylow_times_g1  + zlow_times_g2;
			uint offset_cube_010 = x_low  + yhigh_times_g1 + zlow_times_g2;
			uint offset_cube_110 = x_high + yhigh_times_g1 + zlow_times_g2;
			uint offset_cube_001 = x_low  + ylow_times_g1  + zhigh_times_g2;
			uint offset_cube_101 = x_high + ylow_times_g1  + zhigh_times_g2;
			uint offset_cube_011 = x_low  + yhigh_times_g1 + zhigh_times_g2;
			uint offset_cube_111 = x_high + yhigh_times_g1 + zhigh_times_g2;

			uint mul_tmp = atom_typeid*g3;

			float cube[2][2][2];
			cube [0][0][0] = *(dockpars_fgrids + offset_cube_000 + mul_tmp);
			cube [1][0][0] = *(dockpars_fgrids + offset_cube_100 + mul_tmp);
			cube [0][1][0] = *(dockpars_fgrids + offset_cube_010 + mul_tmp);
		        cube [1][1][0] = *(dockpars_fgrids + offset_cube_110 + mul_tmp);
		        cube [0][0][1] = *(dockpars_fgrids + offset_cube_001 + mul_tmp);
			cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp);
                        cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp);
                        cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp);
439
440
441
442
443
444
445
446
447
448

			// -------------------------------------------------------------------
			// Calculate gradients (forces) corresponding to 
			// "atype" intermolecular energy
			// Derived from autodockdev/maps.py
			// -------------------------------------------------------------------

			if (*is_enabled_gradient_calc) {
				// vector in x-direction
				/*
449
			 	x10 = grid[int(vertices[1])] - grid[int(vertices[0])] # z = 0
450
451
452
453
454
455
456
457
458
459
460
461
462
463
				x52 = grid[int(vertices[5])] - grid[int(vertices[2])] # z = 0
				x43 = grid[int(vertices[4])] - grid[int(vertices[3])] # z = 1
				x76 = grid[int(vertices[7])] - grid[int(vertices[6])] # z = 1
				vx_z0 = (1-yd) * x10 + yd * x52     #  z = 0
				vx_z1 = (1-yd) * x43 + yd * x76     #  z = 1
				gradient[0] = (1-zd) * vx_z0 + zd * vx_z1 
				*/

				x10 = cube [1][0][0] - cube [0][0][0]; // z = 0
				x52 = cube [1][1][0] - cube [0][1][0]; // z = 0
				x43 = cube [1][0][1] - cube [0][0][1]; // z = 1
				x76 = cube [1][1][1] - cube [0][1][1]; // z = 1
				vx_z0 = (1 - dy) * x10 + dy * x52;     // z = 0
				vx_z1 = (1 - dy) * x43 + dy * x76;     // z = 1
464
				gradient_inter_x[atom_id] += (1 - dz) * vx_z0 + dz * vx_z1;
465
466
467
468
469
470

				// vector in y-direction
				/*
				y20 = grid[int(vertices[2])] - grid[int(vertices[0])] # z = 0
				y51 = grid[int(vertices[5])] - grid[int(vertices[1])] # z = 0
				y63 = grid[int(vertices[6])] - grid[int(vertices[3])] # z = 1
471
472
473
474
			for (uint rotation_counter = get_local_id(0);
	          rotation_counter < dockpars_rotbondlist_length;
	          rotation_counter+=NUM_OF_THREADS_PER_BLOCK)
	{	y74 = grid[int(vertices[7])] - grid[int(vertices[4])] # z = 1
475
476
477
478
479
480
481
482
483
484
485
				vy_z0 = (1-xd) * y20 + xd * y51     #  z = 0
				vy_z1 = (1-xd) * y63 + xd * y74     #  z = 1
				gradient[1] = (1-zd) * vy_z0 + zd * vy_z1
				*/

				y20 = cube[0][1][0] - cube [0][0][0];	// z = 0
				y51 = cube[1][1][0] - cube [1][0][0];	// z = 0
				y63 = cube[0][1][1] - cube [0][0][1];	// z = 1
				y74 = cube[1][1][1] - cube [1][0][1];	// z = 1
				vy_z0 = (1 - dx) * y20 + dx * y51;	// z = 0
				vy_z1 = (1 - dx) * y63 + dx * y74;	// z = 1
486
				gradient_inter_y[atom_id] += (1 - dz) * vy_z0 + dz * vy_z1;
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504

				// vectors in z-direction
				/*	
				z30 = grid[int(vertices[3])] - grid[int(vertices[0])] # y = 0
				z41 = grid[int(vertices[4])] - grid[int(vertices[1])] # y = 0
				z62 = grid[int(vertices[6])] - grid[int(vertices[2])] # y = 1
				z75 = grid[int(vertices[7])] - grid[int(vertices[5])] # y = 1
				vz_y0 = (1-xd) * z30 + xd * z41     # y = 0
				vz_y1 = (1-xd) * z62 + xd * z75     # y = 1
				gradient[2] = (1-yd) * vz_y0 + yd * vz_y1
				*/

				z30 = cube [0][0][1] - cube [0][0][0];	// y = 0
				z41 = cube [1][0][1] - cube [1][0][0];	// y = 0
				z62 = cube [0][1][1] - cube [0][1][0];	// y = 1 
				z75 = cube [1][1][1] - cube [1][1][0];	// y = 1
				vz_y0 = (1 - dx) * z30 + dx * z41;	// y = 0
				vz_y1 = (1 - dx) * z62 + dx * z75;	// y = 1
505
				gradient_inter_z[atom_id] += (1 - dy) * vz_y0 + dy * vz_y1;
506
			}
507
			// -------------------------------------------------------------------	
Leonardo Solis's avatar
Leonardo Solis committed
508
#else
509
510
511
512
513
514
515
			// -------------------------------------------------------------------
			// FIXME: this block within the "#else" preprocessor directive 
			// provides NO gradient corresponding to "atype" intermolecular energy
			// -------------------------------------------------------------------	

			cube [0][0][0] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
516
						      dockparsdockpars_num_of_atoms;_gridsize_y, 
517
						      dockpars_gridsize_z,
518
						      atom_typeid, z_low, y_low, x_low);
519
520
521
522
			cube [1][0][0] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
						      dockpars_gridsize_y, 
						      dockpars_gridsize_z,
523
						      atom_typeid, z_low, y_low, x_high);
524
525
526
527
			cube [0][1][0] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
						      dockpars_gridsize_y, 
						      dockpars_gridsize_z,
528
						      atom_typeid, z_low, y_high, x_low);
529
			cube [1][1][0] = GETGRIDVALUE(dockpars_fgrids, 
530
531
						      dockpars_gridsize_x,
						      dockpars_gridsize_y, 
532
						      dockpars_gridsize_z,
533
						      atom_typeid, z_low, y_high, x_high);
534
535
536
537
			cube [0][0][1] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
						      dockpars_gridsize_y, 
						      dockpars_gridsize_z,
538
						      atom_typeid, z_high, y_low, x_low);
539
540
541
542
			cube [1][0][1] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
						      dockpars_gridsize_y, 
						      dockpars_gridsize_z,
543
						      atom_typeid, z_high, y_low, x_high);
544
545
546
547
			cube [0][1][1] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
						      dockpars_gridsize_y, 
						      dockpars_gridsize_z,
548
						      atom_typeid, z_high, y_high, x_low);
549
550
551
552
			cube [1][1][1] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
						      dockpars_gridsize_y, 
						      dockpars_gridsize_z,
553
						      atom_typeid, z_high, y_high, x_high);
Leonardo Solis's avatar
Leonardo Solis committed
554
555
556
557
558
559
#endif

			//calculating affinity energy
			partial_energies[get_local_id(0)] += TRILININTERPOL(cube, weights);

			//capturing electrostatic values
560
			atom_typeid = dockpars_num_of_atypes;
Leonardo Solis's avatar
Leonardo Solis committed
561
562

#if defined (IMPROVE_GRID)
563
564
565
566
567
568
569
570
571
			mul_tmp = atom_typeid*g3;
			cube [0][0][0] = *(dockpars_fgrids + offset_cube_000 + mul_tmp);
			cube [1][0][0] = *(dockpars_fgrids + offset_cube_100 + mul_tmp);
      			cube [0][1][0] = *(dockpars_fgrids + offset_cube_010 + mul_tmp);
      			cube [1][1][0] = *(dockpars_fgrids + offset_cube_110 + mul_tmp);
		       	cube [0][0][1] = *(dockpars_fgrids + offset_cube_001 + mul_tmp);
		        cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp);
		        cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp);
		        cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp);
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586

			// -------------------------------------------------------------------
			// Calculate gradients (forces) corresponding to 
			// "elec" intermolecular energy
			// Derived from autodockdev/maps.py
			// -------------------------------------------------------------------

			if (*is_enabled_gradient_calc) {
				// vector in x-direction
				x10 = cube [1][0][0] - cube [0][0][0]; // z = 0
				x52 = cube [1][1][0] - cube [0][1][0]; // z = 0
				x43 = cube [1][0][1] - cube [0][0][1]; // z = 1
				x76 = cube [1][1][1] - cube [0][1][1]; // z = 1
				vx_z0 = (1 - dy) * x10 + dy * x52;     // z = 0
				vx_z1 = (1 - dy) * x43 + dy * x76;     // z = 1
587
				gradient_inter_x[atom_id] += (1 - dz) * vx_z0 + dz * vx_z1;
588
589
590
591
592
593
594
595

				// vector in y-direction
				y20 = cube[0][1][0] - cube [0][0][0];	// z = 0
				y51 = cube[1][1][0] - cube [1][0][0];	// z = 0
				y63 = cube[0][1][1] - cube [0][0][1];	// z = 1
				y74 = cube[1][1][1] - cube [1][0][1];	// z = 1
				vy_z0 = (1 - dx) * y20 + dx * y51;	// z = 0
				vy_z1 = (1 - dx) * y63 + dx * y74;	// z = 1
596
				gradient_inter_y[atom_id] += (1 - dz) * vy_z0 + dz * vy_z1;
597
598
599
600
601
602
603
604

				// vectors in z-direction
				z30 = cube [0][0][1] - cube [0][0][0];	// y = 0
				z41 = cube [1][0][1] - cube [1][0][0];	// y = 0
				z62 = cube [0][1][1] - cube [0][1][0];	// y = 1 
				z75 = cube [1][1][1] - cube [1][1][0];	// y = 1
				vz_y0 = (1 - dx) * z30 + dx * z41;	// y = 0
				vz_y1 = (1 - dx) * z62 + dx * z75;	// y = 1
605
				gradient_inter_z[atom_id] += (1 - dy) * vz_y0 + dy * vz_y1;
606
607
			}
			// -------------------------------------------------------------------
Leonardo Solis's avatar
Leonardo Solis committed
608
#else
609
610
			// -------------------------------------------------------------------
			// FIXME: this block within the "#else" preprocessor directive 
611
			// provides NO gradient corresponding to "elec" intermolecular energybas
612
613
614
615
616
617
			// -------------------------------------------------------------------

			cube [0][0][0] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
						      dockpars_gridsize_y, 
						      dockpars_gridsize_z,
618
					 	      atom_typeid, z_low, y_low, x_low);
619
620
621
622
			cube [1][0][0] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
623
						      atom_typeid, z_low, y_low, x_high);
624
625
626
627
			cube [0][1][0] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
                                                      dockpars_gridsize_z,
628
						      atom_typeid, z_low, y_high, x_low);
629
630
631
			cube [1][1][0] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
632
						      dockpars_gridsize_z,bas
633
						      atom_typeid, z_low, y_high, x_high);
634
635
636
637
			cube [0][0][1] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
638
						      atom_typeid, z_high, y_low, x_low);
639
640
641
642
			cube [1][0][1] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
643
						      atom_typeid, z_high, y_low, x_high);
644
645
646
647
			cube [0][1][1] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
648
						      atom_typeid, z_high, y_high, x_low);
649
650
651
652
			cube [1][1][1] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
653
						      atom_typeid, z_high, y_high, x_high);
Leonardo Solis's avatar
Leonardo Solis committed
654
655
656
657
658
659
#endif

			//calculating electrosatic energy
			partial_energies[get_local_id(0)] += q * TRILININTERPOL(cube, weights);

			//capturing desolvation values
660
			atom_typeid = dockpars_num_of_atypes+1;
Leonardo Solis's avatar
Leonardo Solis committed
661
662

#if defined (IMPROVE_GRID)
663
664
665
666
667
668
669
670
671
			mul_tmp = atom_typeid*g3;
			cube [0][0][0] = *(dockpars_fgrids + offset_cube_000 + mul_tmp);
			cube [1][0][0] = *(dockpars_fgrids + offset_cube_100 + mul_tmp);
      			cube [0][1][0] = *(dockpars_fgrids + offset_cube_010 + mul_tmp);
      			cube [1][1][0] = *(dockpars_fgrids + offset_cube_110 + mul_tmp);
      			cube [0][0][1] = *(dockpars_fgrids + offset_cube_001 + mul_tmp);
      			cube [1][0][1] = *(dockpars_fgrids + offset_cube_101 + mul_tmp);
      			cube [0][1][1] = *(dockpars_fgrids + offset_cube_011 + mul_tmp);
      			cube [1][1][1] = *(dockpars_fgrids + offset_cube_111 + mul_tmp);
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686

			// -------------------------------------------------------------------
			// Calculate gradients (forces) corresponding to 
			// "dsol" intermolecular energy
			// Derived from autodockdev/maps.py
			// -------------------------------------------------------------------

			if (*is_enabled_gradient_calc) {
				// vector in x-direction
				x10 = cube [1][0][0] - cube [0][0][0]; // z = 0
				x52 = cube [1][1][0] - cube [0][1][0]; // z = 0
				x43 = cube [1][0][1] - cube [0][0][1]; // z = 1
				x76 = cube [1][1][1] - cube [0][1][1]; // z = 1
				vx_z0 = (1 - dy) * x10 + dy * x52;     // z = 0
				vx_z1 = (1 - dy) * x43 + dy * x76;     // z = 1
687
				gradient_inter_x[atom_id] += (1 - dz) * vx_z0 + dz * vx_z1;
688
689
690
691
692
693
694
695

				// vector in y-direction
				y20 = cube[0][1][0] - cube [0][0][0];	// z = 0
				y51 = cube[1][1][0] - cube [1][0][0];	// z = 0
				y63 = cube[0][1][1] - cube [0][0][1];	// z = 1
				y74 = cube[1][1][1] - cube [1][0][1];	// z = 1
				vy_z0 = (1 - dx) * y20 + dx * y51;	// z = 0
				vy_z1 = (1 - dx) * y63 + dx * y74;	// z = 1
696
				gradient_inter_y[atom_id] += (1 - dz) * vy_z0 + dz * vy_z1;
697
698
699
700
701
702
703
704

				// vectors in z-direction
				z30 = cube [0][0][1] - cube [0][0][0];	// y = 0
				z41 = cube [1][0][1] - cube [1][0][0];	// y = 0
				z62 = cube [0][1][1] - cube [0][1][0];	// y = 1 
				z75 = cube [1][1][1] - cube [1][1][0];	// y = 1
				vz_y0 = (1 - dx) * z30 + dx * z41;	// y = 0
				vz_y1 = (1 - dx) * z62 + dx * z75;	// y = 1
705
				gradient_inter_z[atom_id] += (1 - dy) * vz_y0 + dy * vz_y1;
706
707
			}
			// -------------------------------------------------------------------
Leonardo Solis's avatar
Leonardo Solis committed
708
#else
709
710
711
712
713
714
715
716
717
			// -------------------------------------------------------------------
			// FIXME: this block within the "#else" preprocessor directive 
			// provides NO gradient corresponding to "dsol" intermolecular energy
			// -------------------------------------------------------------------

			cube [0][0][0] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
718
						      atom_typeid, z_low, y_low, x_low);
719
			cube [1][0][0] = GETGRIDVALUE(dockpars_fgridsu2,
720
721
722
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
723
						      atom_typeid, z_low, y_low, x_high);
724
			cube [0][1][0] = GETGRIDVALUE(dockpars_fgrids,
725
						      dockpars_gridsize_x,rotbonds_unit_vectors_const
726
727
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
728
						      atom_typeid, z_low, y_high, x_low);
729
730
731
732
			cube [1][1][0] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
733
						      atom_typeid, z_low, y_high, x_high);
734
735
736
737
			cube [0][0][1] = GETGRIDVALUE(dockpars_fgrids, 
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
738
						      atom_typeid, z_high, y_low, x_low);
739
740
741
742
			cube [1][0][1] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
743
						      atom_typeid, z_high, y_low, x_high);
744
745
746
747
			cube [0][1][1] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
748
						      atom_typeid, z_high, y_high, x_low);
749
750
751
752
			cube [1][1][1] = GETGRIDVALUE(dockpars_fgrids,
						      dockpars_gridsize_x,
						      dockpars_gridsize_y,
						      dockpars_gridsize_z,
753
						      atom_typeid, z_high, y_high, x_high);
Leonardo Solis's avatar
Leonardo Solis committed
754
755
756
757
758
759
#endif

			//calculating desolvation energy
			partial_energies[get_local_id(0)] += fabs(q) * TRILININTERPOL(cube, weights);
		}

760
	} // End atom_id for-loop (INTERMOLECULAR ENERGY)
Leonardo Solis's avatar
Leonardo Solis committed
761
762
763

	// In paper: intermolecular and internal energy calculation
	// are independent from each other, -> NO BARRIER NEEDED
764
  	// but require different operations,
Leonardo Solis's avatar
Leonardo Solis committed
765
766
767
768
769
	// thus, they can be executed only sequentially on the GPU.

	// ================================================
	// CALCULATE INTRAMOLECULAR ENERGY
	// ================================================
770
771
772
	for (uint contributor_counter = get_local_id(0);
	          contributor_counter < dockpars_num_of_intraE_contributors;
	          contributor_counter +=NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
773
774
	{
		//getting atom IDs
775
776
		uint atom1_id = intraE_contributors_const[3*contributor_counter];
		uint atom2_id = intraE_contributors_const[3*contributor_counter+1];
Leonardo Solis's avatar
Leonardo Solis committed
777
778

		//calculating address of first atom's coordinates
779
780
781
		float subx = calc_coords_x[atom1_id];
		float suby = calc_coords_y[atom1_id];
		float subz = calc_coords_z[atom1_id];
Leonardo Solis's avatar
Leonardo Solis committed
782
783
784
785
786
787

		//calculating address of second atom's coordinates
		subx -= calc_coords_x[atom2_id];
		suby -= calc_coords_y[atom2_id];
		subz -= calc_coords_z[atom2_id];

788
		//calculating distance (atomic_distance)
Leonardo Solis's avatar
Leonardo Solis committed
789
#if defined (NATIVE_PRECISION)
790
		float atomic_distance = native_sqrt(subx*subx + suby*suby + subz*subz)*dockpars_grid_spacing;
Leonardo Solis's avatar
Leonardo Solis committed
791
#elif defined (HALF_PRECISION)
792
		float atomic_distance = half_sqrt(subx*subx + suby*suby + subz*subz)*dockpars_grid_spacing;
Leonardo Solis's avatar
Leonardo Solis committed
793
#else	// Full precision
794
		float atomic_distance = sqrt(subx*subx + suby*suby + subz*subz)*dockpars_grid_spacing;
Leonardo Solis's avatar
Leonardo Solis committed
795
796
#endif

797
798
		if (atomic_distance < 1.0f)
			atomic_distance = 1.0f;
Leonardo Solis's avatar
Leonardo Solis committed
799
800

		//calculating energy contributions
801
		if ((atomic_distance < 8.0f) && (atomic_distance < 20.48f))
Leonardo Solis's avatar
Leonardo Solis committed
802
		{
803
804
805
			// Getting type IDs
			uint atom1_typeid = atom_types_const[atom1_id];
			uint atom2_typeid = atom_types_const[atom2_id];
Leonardo Solis's avatar
Leonardo Solis committed
806
807
808

			//calculating van der Waals / hydrogen bond term
#if defined (NATIVE_PRECISION)
809
			partial_energies[get_local_id(0)] += native_divide(VWpars_AC_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],native_powr(atomic_distance,12));
Leonardo Solis's avatar
Leonardo Solis committed
810
#elif defined (HALF_PRECISION)
811
812
813
814
			pfor (uint rotation_counter = get_local_id(0);
	          rotation_counter < dockpars_rotbondlist_length;
	          rotation_counter+=NUM_OF_THREADS_PER_BLOCK)
	{artial_energies[get_local_id(0)] += half_divide(VWpars_AC_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],half_powr(atomic_distance,12));
Leonardo Solis's avatar
Leonardo Solis committed
815
#else	// Full precision
816
			partial_energies[get_local_id(0)] += VWpars_AC_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid]/powr(atomic_distance,12);
Leonardo Solis's avatar
Leonardo Solis committed
817
818
819
820
#endif

			if (intraE_contributors_const[3*contributor_counter+2] == 1)	//H-bond
#if defined (NATIVE_PRECISION)
821
				partial_energies[get_local_id(0)] -= native_divide(VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],native_powr(atomic_distance,10));
Leonardo Solis's avatar
Leonardo Solis committed
822
#elif defined (HALF_PRECISION)
823
				partial_energies[get_local_id(0)] -= half_divide(VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],half_powr(atomic_distance,10));
Leonardo Solis's avatar
Leonardo Solis committed
824
#else	// Full precision
825
				partial_energies[get_local_id(0)] -= VWpars_BD_const[atom1_typeid*dockpars_num_of_atypes+atom2_typeid]/powr(atomic_distance,10);
Leonardo Solis's avatar
Leonardo Solis committed
826
827
828
829
#endif

			else	//van der Waals
#if defined (NATIVE_PRECISION)
830
				partial_energies[get_local_id(0)] -= native_divide(VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],native_powr(atomic_distance,6));
Leonardo Solis's avatar
Leonardo Solis committed
831
#elif defined (HALF_PRECISION)
832
				partial_energies[get_local_id(0)] -= half_divide(VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],half_powr(atomic_distance,6));
Leonardo Solis's avatar
Leonardo Solis committed
833
#else	// Full precision
834
				partial_energies[get_local_id(0)] -= VWpars_BD_const[atom1_typeid*dockpars_num_of_atypes+atom2_typeid]/powr(atomic_distance,6);
Leonardo Solis's avatar
Leonardo Solis committed
835
836
837
838
839
840
#endif

			//calculating electrostatic term
#if defined (NATIVE_PRECISION)
        partial_energies[get_local_id(0)] += native_divide (
                                                             dockpars_coeff_elec * atom_charges_const[atom1_id] * atom_charges_const[atom2_id],
841
                                                             atomic_distance * (-8.5525f + native_divide(86.9525f,(1.0f + 7.7839f*native_exp(-0.3154f*atomic_distance))))
Leonardo Solis's avatar
Leonardo Solis committed
842
843
844
845
                                                             );
#elif defined (HALF_PRECISION)
        partial_energies[get_local_id(0)] += half_divide (
                                                             dockpars_coeff_elec * atom_charges_const[atom1_id] * atom_charges_const[atom2_id],
846
                                                             atomic_distance * (-8.5525f + half_divide(86.9525f,(1.0f + 7.7839f*half_exp(-0.3154f*atomic_distance))))
Leonardo Solis's avatar
Leonardo Solis committed
847
848
849
                                                             );
#else	// Full precision
				partial_energies[get_local_id(0)] += dockpars_coeff_elec*atom_charges_const[atom1_id]*atom_charges_const[atom2_id]/
850
			                                       (atomic_distance*(-8.5525f + 86.9525f/(1.0f + 7.7839f*exp(-0.3154f*atomic_distance))));
Leonardo Solis's avatar
Leonardo Solis committed
851
852
853
854
855
#endif

			//calculating desolvation term
#if defined (NATIVE_PRECISION)
			partial_energies[get_local_id(0)] += ((dspars_S_const[atom1_typeid] +
856
857
858
859
							       dockpars_qasp*fabs(atom_charges_const[atom1_id]))*dspars_V_const[atom2_typeid] +
					                       (dspars_S_const[atom2_typeid] +
							       dockpars_qasp*fabs(atom_charges_const[atom2_id]))*dspars_V_const[atom1_typeid]) *
					                       dockpars_coeff_desolv*native_exp(-atomic_distance*native_divide(atomic_distance,25.92f));
Leonardo Solis's avatar
Leonardo Solis committed
860
861
#elif defined (HALF_PRECISION)
			partial_energies[get_local_id(0)] += ((dspars_S_const[atom1_typeid] +
862
863
864
865
							       dockpars_qasp*fabs(atom_charges_const[atom1_id]))*dspars_V_const[atom2_typeid] +
					                       (dspars_S_const[atom2_typeid] +
							       dockpars_qasp*fabs(atom_charges_const[atom2_id]))*dspars_V_const[atom1_typeid]) *
					                       dockpars_coeff_desolv*half_exp(-atomic_distance*half_divide(atomic_distance,25.92f));
Leonardo Solis's avatar
Leonardo Solis committed
866
867
#else	// Full precision
			partial_energies[get_local_id(0)] += ((dspars_S_const[atom1_typeid] +
868
869
870
871
							       dockpars_qasp*fabs(atom_charges_const[atom1_id]))*dspars_V_const[atom2_typeid] +
					                       (dspars_S_const[atom2_typeid] +
							       dockpars_qasp*fabs(atom_charges_const[atom2_id]))*dspars_V_const[atom1_typeid]) *
					                       dockpars_coeff_desolv*exp(-atomic_distance*atomic_distance/25.92f);
Leonardo Solis's avatar
Leonardo Solis committed
872
873
874
#endif

		}
875
876
877
878
879
880
881
882
883
	} // End contributor_counter for-loop (INTRAMOLECULAR ENERGY)








Leonardo Solis's avatar
Leonardo Solis committed
884
885
886
887
888
889
890

	barrier(CLK_LOCAL_MEM_FENCE);

	if (get_local_id(0) == 0)
	{
		*energy = partial_energies[0];

891
892
893
		for (uint contributor_counter=1;
		          contributor_counter<NUM_OF_THREADS_PER_BLOCK;
		          contributor_counter++)
Leonardo Solis's avatar
Leonardo Solis committed
894
895
896
897
		{
			*energy += partial_energies[contributor_counter];
		}
	}
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917

	// -------------------------------------------------------------------
	// Calculate gradients (forces) corresponding to (interE + intraE)
	// Derived from autodockdev/motions.py/forces_to_delta()
	// -------------------------------------------------------------------
	
	// Could be barrier removed if another work-item is used? 
	// (e.g. get_locla_id(0) == 1)
	barrier(CLK_LOCAL_MEM_FENCE);

	// FIXME: done so far only for interE
	if (get_local_id(0) == 0) {
		if (*is_enabled_gradient_calc) {
			gradient_genotype [0] = 0.0f;
			gradient_genotype [1] = 0.0f;
			gradient_genotype [2] = 0.0f;
		
			// ------------------------------------------
			// translation-related gradients
			// ------------------------------------------
918
919
920
			for (uint lig_atom_id = 0;
				  lig_atom_id<dockpars_num_of_atoms;
				  lig_atom_id++) {
921
922
923
924
925
926
927
928
				gradient_genotype [0] += gradient_inter_x[lig_atom_id]; // gradient for gene 0: gene x
				gradient_genotype [1] += gradient_inter_y[lig_atom_id]; // gradient for gene 1: gene y
				gradient_genotype [2] += gradient_inter_z[lig_atom_id]; // gradient for gene 2: gene z
			}

			// ------------------------------------------
			// rotation-related gradients 
			// ------------------------------------------
929
			float3 torque_rot = (float3)(0.0f, 0.0f, 0.0f);
930
931
932
933
934
935
936
937
938
939
940
941
942

			// center of rotation 
			// In getparameters.cpp, it indicates 
			// translation genes are in grid spacing (instead of Angstroms)
			float about[3];
			about[0] = genotype[0]; 
			about[1] = genotype[1];
			about[2] = genotype[2];
		
			// Temporal variable to calculate translation differences.
			// They are converted back to Angstroms here
			float3 r;
			
943
944
945
			for (uint lig_atom_id = 0;
				  lig_atom_id<dockpars_num_of_atoms;
				  lig_atom_id++) {
946
947
948
				r.x = (calc_coords_x[lig_atom_id] - about[0]) * dockpars_grid_spacing; 
				r.y = (calc_coords_y[lig_atom_id] - about[1]) * dockpars_grid_spacing;  
				r.z = (calc_coords_z[lig_atom_id] - about[2]) * dockpars_grid_spacing; 
949
				torque_rot += cross(r, torque_rot);
950
951
952
953
954
955
956
957
958
959
			}

			const float rad = 1E-8;
			const float rad_div_2 = native_divide(rad, 2);

			
			float quat_w, quat_x, quat_y, quat_z;

			// Derived from rotation.py/axisangle_to_q()
			// genes[3:7] = rotation.axisangle_to_q(torque, rad)
960
961
962
963
			torque_rot = fast_normalize(torque_rot);
			quat_x = torque_rot.x;
			quat_y = torque_rot.y;
			quat_z = torque_rot.z;
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982

			// rotation-related gradients are expressed here in quaternions
			quat_w = native_cos(rad_div_2);
			quat_x = quat_x * native_sin(rad_div_2);
			quat_y = quat_y * native_sin(rad_div_2);
			quat_z = quat_z * native_sin(rad_div_2);

			// convert quaternion gradients into Shoemake gradients 
			// Derived from autodockdev/motion.py/_get_cube3_gradient

			// where we are in cube3
			float current_u1, current_u2, current_u3;
			current_u1 = genotype[3]; // check very initial input Shoemake genes
			current_u2 = genotype[4];
			current_u3 = genotype[5];

			// where we are in quaternion space
			// current_q = cube3_to_quaternion(current_u)
			float current_qw, current_qx, current_qy, current_qz;
983
984
985
986
			current_qw = native_sqrt(1-current_u1) * native_sin(PI_TIMES_2*current_u2);
			current_qx = native_sqrt(1-current_u1) * native_cos(PI_TIMES_2*current_u2);
			current_qy = native_sqrt(current_u1)   * native_sin(PI_TIMES_2*current_u3);
			current_qz = native_sqrt(current_u1)   * native_cos(PI_TIMES_2*current_u3);
987
988
989
990
991
992
993
994
995
996
997
998
999
1000

			// where we want to be in quaternion space
			float target_qw, target_qx, target_qy, target_qz;

			// target_q = rotation.q_mult(q, current_q)
			// Derived from autodockdev/rotation.py/q_mult()
			// In our terms means q_mult(quat_{w|x|y|z}, current_q{w|x|y|z})
			target_qw = quat_w*current_qw - quat_x*current_qx - quat_y*current_qy - quat_z*current_qz;// w
			target_qx = quat_w*current_qx + quat_x*current_qw + quat_y*current_qz - quat_z*current_qy;// x
			target_qy = quat_w*current_qy + quat_y*current_qw + quat_z*current_qx - quat_x*current_qz;// y
			target_qz = quat_w*current_qz + quat_z*current_qw + quat_x*current_qy - quat_y*current_qx;// z

			// where we want ot be in cube3
			float target_u1, target_u2, target_u3;