kernel_gradient.cl 26.9 KB
Newer Older
1
2
// Gradient-based steepest descent minimizer
// Alternative to Solis-Wetts
3

lvs's avatar
lvs committed
4
5
6
7
8
//#define DEBUG_ENERGY_KERNEL5
	//#define PRINT_ENERGIES
	//#define PRINT_GENES_AND_GRADS
	//#define PRINT_ATOMIC_COORDS

lvs's avatar
lvs committed
9
10
11
// Enable DEBUG_MINIMIZER for a seeing a detailed SD evolution
// If only PRINT_MINIMIZER_ENERGY_EVOLUTION is enabled,
// then a only a simplified SD evolution will be shown
12
//#define DEBUG_MINIMIZER
lvs's avatar
lvs committed
13
//	#define PRINT_MINIMIZER_ENERGY_EVOLUTION
14

15
// Enable this for debugging SD from a defined initial genotype
lvs's avatar
lvs committed
16
//#define DEBUG_INITIAL_2BRT
17

18
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
19
20
gradient_minimizer(	
			char   dockpars_num_of_atoms,
21
22
23
24
25
			char   dockpars_num_of_atypes,
			int    dockpars_num_of_intraE_contributors,
			char   dockpars_gridsize_x,
			char   dockpars_gridsize_y,
			char   dockpars_gridsize_z,
26
27
28
							    		// g1 = gridsize_x
			uint   dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			uint   dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
29
			float  dockpars_grid_spacing,
30
	 __global const float* restrict dockpars_fgrids, // This is too large to be allocated in __constant 
31
32
33
			int    dockpars_rotbondlist_length,
			float  dockpars_coeff_elec,
			float  dockpars_coeff_desolv,
34
35
	  __global      float* restrict dockpars_conformations_next,
	  __global      float* restrict dockpars_energies_next,
36
  	  __global 	int*   restrict dockpars_evals_of_new_entities,
37
	  __global      uint*  restrict dockpars_prng_states,
38
39
40
			int    dockpars_pop_size,
			int    dockpars_num_of_genes,
			float  dockpars_lsearch_rate,
41
			uint   dockpars_num_of_lsentities,
lvs's avatar
lvs committed
42
			uint   dockpars_max_num_of_iters,
43
			float  dockpars_qasp,
44
45
46
	     __constant float* atom_charges_const,
    	     __constant char*  atom_types_const,
	     __constant char*  intraE_contributors_const,
lvs's avatar
lvs committed
47
48
49
                        float  dockpars_smooth,
 	     __constant float* reqm,
	     __constant float* reqm_hbond,
lvs's avatar
lvs committed
50
51
	     __constant uint*  atom1_types_reqm,
	     __constant uint*  atom2_types_reqm,
52
53
54
55
56
57
58
59
60
61
62
    	     __constant float* VWpars_AC_const,
    	     __constant float* VWpars_BD_const,
             __constant float* dspars_S_const,
             __constant float* dspars_V_const,
             __constant int*   rotlist_const,
    	     __constant float* ref_coords_x_const,
    	     __constant float* ref_coords_y_const,
             __constant float* ref_coords_z_const,
    	     __constant float* rotbonds_moving_vectors_const,
             __constant float* rotbonds_unit_vectors_const,
             __constant float* ref_orientation_quats_const,
63
64
	     __constant int*   rotbonds_const,
	     __constant int*   rotbonds_atoms_const,
lvs's avatar
lvs committed
65
	     __constant int*   num_rotating_atoms_per_rotbond_const
66
67
68
69
			,
	     __constant float* angle_const,
	     __constant float* dependence_on_theta_const,
	     __constant float* dependence_on_rotangle_const
70
71
72
73
74
75
76
77
78
)
//The GPU global function performs gradient-based minimization on (some) entities of conformations_next.
//The number of OpenCL compute units (CU) which should be started equals to num_of_minEntities*num_of_runs.
//This way the first num_of_lsentities entity of each population will be subjected to local search
//(and each CU carries out the algorithm for one entity).
//Since the first entity is always the best one in the current population,
//it is always tested according to the ls probability, and if it not to be
//subjected to local search, the entity with ID num_of_lsentities is selected instead of the first one (with ID 0).
{
79
	// -----------------------------------------------------------------------------
80
81
82
	// -----------------------------------------------------------------------------
	// -----------------------------------------------------------------------------

83
	// Determining entity, and its run, energy, and genotype
84
85
	__local int   entity_id;
	__local int   run_id;
Leonardo Solis's avatar
Leonardo Solis committed
86
87
88
  	__local float energy;
	__local float genotype[ACTUAL_GENOTYPE_LENGTH];

lvs's avatar
lvs committed
89
90
	// Iteration counter fot the minimizer
  	__local uint iteration_cnt;  	
91

Leonardo Solis's avatar
Leonardo Solis committed
92
93
94
	// Stepsize for the minimizer
	__local float stepsize;

95
96
	if (get_local_id(0) == 0)
	{
97
		// Choosing a random entity out of the entire population
lvs's avatar
lvs committed
98
		/*
99
		run_id = get_group_id(0);
Leonardo Solis's avatar
Leonardo Solis committed
100
101
		//entity_id = (uint)(dockpars_pop_size * gpu_randf(dockpars_prng_states));
		entity_id = 0;
lvs's avatar
lvs committed
102
		*/
103

104
105
		run_id = get_group_id(0) / dockpars_num_of_lsentities;
		entity_id = get_group_id(0) % dockpars_num_of_lsentities;
106
107
108
109
110
111
112
113
114
115

		// Since entity 0 is the best one due to elitism,
		// it should be subjected to random selection
		if (entity_id == 0) {
			// If entity 0 is not selected according to LS-rate,
			// choosing an other entity
			if (100.0f*gpu_randf(dockpars_prng_states) > dockpars_lsearch_rate) {
				entity_id = dockpars_num_of_lsentities;					
			}
		}
116
		
Leonardo Solis's avatar
Leonardo Solis committed
117
		energy = dockpars_energies_next[run_id*dockpars_pop_size+entity_id];
118

Leonardo Solis's avatar
Leonardo Solis committed
119
120
		// Initializing gradient-minimizer counters and flags
    		iteration_cnt  = 0;
Leonardo Solis's avatar
Leonardo Solis committed
121
		stepsize       = STEP_START;
122
123

		#if defined (DEBUG_MINIMIZER) || defined (PRINT_MINIMIZER_ENERGY_EVOLUTION)
124
125
126
127
128
129
130
131
		printf("\n");
		printf("-------> Start of SD minimization cycle\n");
		printf("%20s %6u\n", "run_id: ", run_id);
		printf("%20s %6u\n", "entity_id: ", entity_id);
		printf("\n");
		printf("%20s \n", "LGA genotype: ");
		printf("%20s %.6f\n", "initial energy: ", energy);
		printf("%20s %.6f\n", "initial stepsize: ", stepsize);
132
		#endif
133
134
	}

135
136
	barrier(CLK_LOCAL_MEM_FENCE);

lvs's avatar
lvs committed
137
138
139
  	event_t ev = async_work_group_copy(genotype,
  			      		   dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
                              		   dockpars_num_of_genes, 0);
140

141
142
143
	// Asynchronous copy should be finished by here
	wait_group_events(1,&ev);

144
  	// -----------------------------------------------------------------------------
145
146
	// -----------------------------------------------------------------------------
	// -----------------------------------------------------------------------------
Leonardo Solis's avatar
Leonardo Solis committed
147
	           
148
	// Partial results of the gradient step
Leonardo Solis's avatar
Leonardo Solis committed
149
150
151
	__local float gradient          [ACTUAL_GENOTYPE_LENGTH];
	__local float candidate_energy;
	__local float candidate_genotype[ACTUAL_GENOTYPE_LENGTH];
152
153
154
155
156

	// -------------------------------------------------------------------
	// Calculate gradients (forces) for intermolecular energy
	// Derived from autodockdev/maps.py
	// -------------------------------------------------------------------
157
	// Gradient of the intermolecular energy per each ligand atom
lvs's avatar
lvs committed
158
	// Also used to store the accummulated gradient per each ligand atom
159
160
161
	__local float gradient_inter_x[MAX_NUM_OF_ATOMS];
	__local float gradient_inter_y[MAX_NUM_OF_ATOMS];
	__local float gradient_inter_z[MAX_NUM_OF_ATOMS];
162

163
	// Gradient of the intramolecular energy per each ligand atom
164
165
166
	__local float gradient_intra_x[MAX_NUM_OF_ATOMS];
	__local float gradient_intra_y[MAX_NUM_OF_ATOMS];
	__local float gradient_intra_z[MAX_NUM_OF_ATOMS];
167

168
	// Ligand-atom position and partial energies
169
170
171
172
173
	__local float calc_coords_x[MAX_NUM_OF_ATOMS];
	__local float calc_coords_y[MAX_NUM_OF_ATOMS];
	__local float calc_coords_z[MAX_NUM_OF_ATOMS];
	__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];

lvs's avatar
lvs committed
174
	#if defined (DEBUG_ENERGY_KERNEL)
175
176
	__local float partial_interE[NUM_OF_THREADS_PER_BLOCK];
	__local float partial_intraE[NUM_OF_THREADS_PER_BLOCK];
177
	#endif
178

179
	// Enable this for debugging SD from a defined initial genotype
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
	#if defined (DEBUG_INITIAL_2BRT)
	// 2brt
	genotype[0] = 24.093334;
	genotype[1] = 24.658667;
	genotype[2] = 24.210667;
	genotype[3] = 50.0;
	genotype[4] = 50.0;
	genotype[5] = 50.0;
	genotype[6] = 0.0f;
	genotype[7] = 0.0f;
	genotype[8] = 0.0f;
	genotype[9] = 0.0f;
	genotype[10] = 0.0f;
	genotype[11] = 0.0f;
	genotype[12] = 0.0f;
	genotype[13] = 0.0f;
	genotype[14] = 0.0f;
	genotype[15] = 0.0f;
	genotype[16] = 0.0f;
	genotype[17] = 0.0f;
	genotype[18] = 0.0f;
	genotype[19] = 0.0f;
	genotype[20] = 0.0f;

	// Evaluating candidate
	barrier(CLK_LOCAL_MEM_FENCE);

	// =============================================================
	gpu_calc_energy(dockpars_rotbondlist_length,
			dockpars_num_of_atoms,
			dockpars_gridsize_x,
			dockpars_gridsize_y,
			dockpars_gridsize_z,
							    	// g1 = gridsize_x
			dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
			dockpars_fgrids,
			dockpars_num_of_atypes,
			dockpars_num_of_intraE_contributors,
			dockpars_grid_spacing,
			dockpars_coeff_elec,
			dockpars_qasp,
			dockpars_coeff_desolv,
lvs's avatar
lvs committed
223
			genotype, /*WARNING: calculating the energy of the hardcoded genotype*/
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
			&energy,
			&run_id,
			// Some OpenCL compilers don't allow declaring 
			// local variables within non-kernel functions.
			// These local variables must be declared in a kernel, 
			// and then passed to non-kernel functions.
			calc_coords_x,
			calc_coords_y,
			calc_coords_z,
			partial_energies,
			#if defined (DEBUG_ENERGY_KERNEL)
			partial_interE,
			partial_intraE,
			#endif

			atom_charges_const,
			atom_types_const,
			intraE_contributors_const,
#if 0
			true,
#endif
			dockpars_smooth,
			reqm,
			reqm_hbond,
	     	        atom1_types_reqm,
	     	        atom2_types_reqm,
			VWpars_AC_const,
			VWpars_BD_const,
			dspars_S_const,
			dspars_V_const,
			rotlist_const,
			ref_coords_x_const,
			ref_coords_y_const,
			ref_coords_z_const,
			rotbonds_moving_vectors_const,
			rotbonds_unit_vectors_const,
			ref_orientation_quats_const
			);
	// =============================================================

264
265
266
	// WARNING: hardcoded has priority over LGA genotype.
	// That means, if DEBUG_INITIAL_2BRT is defined, then
	// LGA genotype is not used (only for debugging purposes)
267
268
	if (get_local_id(0) == 0)
	{
269
270
271
272
		printf("\n");
		printf("%20s \n", "hardcoded genotype: ");
		printf("%20s %.6f\n", "initial energy: ", energy);
		printf("%20s %.6f\n\n", "initial stepsize: ", stepsize);		
273
274
275
276
	}
	barrier(CLK_LOCAL_MEM_FENCE);
	#endif

277
	// Perform gradient-descent iterations
278

279
	#if 0
Leonardo Solis's avatar
Leonardo Solis committed
280
281
282
283
284
285
286
287
288
289
	// 7cpa
	float grid_center_x = 49.836;
	float grid_center_y = 17.609;
	float grid_center_z = 36.272;
	float ligand_center_x = 49.2216976744186;
	float ligand_center_y = 17.793953488372097;
	float ligand_center_z = 36.503837209302326;
	float shoemake_gene_u1 = 0.02;
	float shoemake_gene_u2 = 0.23;
	float shoemake_gene_u3 = 0.95;
290
	#endif
Leonardo Solis's avatar
Leonardo Solis committed
291

292
293
294
295
296
297
298
299
300
	#if 0
	// 3tmn
	float grid_center_x = 52.340;
	float grid_center_y = 15.029;
	float grid_center_z = -2.932;
	float ligand_center_x = 52.22740741;
	float ligand_center_y = 15.51751852;
	float ligand_center_z = -2.40896296;
	#endif
301

lvs's avatar
lvs committed
302
	// Calculating maximum possible stepsize (alpha)
lvs's avatar
lvs committed
303
	__local float max_trans_grad, max_rota_grad, max_tors_grad;
lvs's avatar
lvs committed
304
305
306
	__local float max_trans_stepsize, max_rota_stepsize, max_tors_stepsize;
	__local float max_stepsize;

lvs's avatar
lvs committed
307
308
	// Storing torsion gradients here
	__local float torsions_gradient[ACTUAL_GENOTYPE_LENGTH];
lvs's avatar
lvs committed
309

Leonardo Solis's avatar
Leonardo Solis committed
310
311
312
	// The termination criteria is based on 
	// a maximum number of iterations, and
	// the minimum step size allowed for single-floating point numbers 
lvs's avatar
lvs committed
313
	// (IEEE-754 single float has a precision of about 6 decimal digits)
314
	do {
315
		#if 0
316
		// Specific input genotypes for a ligand with no rotatable bonds (1ac8).
Leonardo Solis's avatar
Leonardo Solis committed
317
		// Translation genes must be expressed in grids in OCLADock (genotype [0|1|2]).
318
319
		// However, for testing purposes, 
		// we start using translation values in real space (Angstrom): {31.79575, 93.743875, 47.699875}
Leonardo Solis's avatar
Leonardo Solis committed
320
		// Rotation genes are expresed in the Shoemake space: genotype [3|4|5]
321
		// xyz_gene_gridspace = gridcenter_gridspace + (input_gene_realspace - gridcenter_realspace)/gridsize
322
323

		// 1ac8				
Leonardo Solis's avatar
Leonardo Solis committed
324
325
326
327
328
329
		genotype[0] = 30 + (31.79575  - 31.924) / 0.375;
		genotype[1] = 30 + (93.743875 - 93.444) / 0.375;
		genotype[2] = 30 + (47.699875 - 47.924) / 0.375;
		genotype[3] = 0.1f;
		genotype[4] = 0.5f;
		genotype[5] = 0.9f;
330
		#endif
331

332
		#if 0
333
		// 3tmn
Leonardo Solis's avatar
Leonardo Solis committed
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
		genotype[0] = 30 + (ligand_center_x - grid_center_x) / 0.375;
		genotype[1] = 30 + (ligand_center_y - grid_center_y) / 0.375;
		genotype[2] = 30 + (ligand_center_z - grid_center_z) / 0.375;
		genotype[3] = shoemake_gene_u1;
		genotype[4] = shoemake_gene_u2;
		genotype[5] = shoemake_gene_u3;
		genotype[6] = 0.0f;
		genotype[7] = 0.0f;
		genotype[8] = 0.0f;
		genotype[9] = 0.0f;
		genotype[10] = 0.0f;
		genotype[11] = 0.0f;
		genotype[12] = 0.0f;
		genotype[13] = 0.0f;
		genotype[14] = 0.0f;
		genotype[15] = 0.0f;
		genotype[16] = 0.0f;
		genotype[17] = 0.0f;
		genotype[18] = 0.0f;
		genotype[19] = 0.0f;
		genotype[20] = 0.0f;
355
		#endif
356

357
358
		#if 0
		// 2j5s
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
		genotype[0] = 28.464;
		genotype[1] = 25.792762;
		genotype[2] = 23.740571;
		genotype[3] = 50.0;
		genotype[4] = 50.0;
		genotype[5] = 50.0;
		genotype[6] = 0.0f;
		genotype[7] = 0.0f;
		genotype[8] = 0.0f;
		genotype[9] = 0.0f;
		genotype[10] = 0.0f;
		genotype[11] = 0.0f;
		genotype[12] = 0.0f;
		genotype[13] = 0.0f;
		genotype[14] = 0.0f;
		genotype[15] = 0.0f;
		genotype[16] = 0.0f;
		genotype[17] = 0.0f;
		genotype[18] = 0.0f;
		genotype[19] = 0.0f;
		genotype[20] = 0.0f;
380
381
		#endif

lvs's avatar
lvs committed
382
		#if 0
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
		// 2brt
		genotype[0] = 24.093334;
		genotype[1] = 24.658667;
		genotype[2] = 24.210667;
		genotype[3] = 50.0;
		genotype[4] = 50.0;
		genotype[5] = 50.0;
		genotype[6] = 0.0f;
		genotype[7] = 0.0f;
		genotype[8] = 0.0f;
		genotype[9] = 0.0f;
		genotype[10] = 0.0f;
		genotype[11] = 0.0f;
		genotype[12] = 0.0f;
		genotype[13] = 0.0f;
		genotype[14] = 0.0f;
		genotype[15] = 0.0f;
		genotype[16] = 0.0f;
		genotype[17] = 0.0f;
		genotype[18] = 0.0f;
		genotype[19] = 0.0f;
		genotype[20] = 0.0f;
		#endif
406

407
408
409
410
411
412
413
414
415
416
417
418
419
		// Printing number of stepest-descent iterations
		#if defined (DEBUG_MINIMIZER) 
		if (get_local_id(0) == 0) {
			printf("%s\n", "----------------------------------------------------------");	
		}
		#endif
		
		#if defined (DEBUG_MINIMIZER) || defined (PRINT_MINIMIZER_ENERGY_EVOLUTION)
		if (get_local_id(0) == 0) {
			printf("%-15s %-3u ", "# SD iteration: ", iteration_cnt);
		}
		#endif

420
		// Calculating gradient
Leonardo Solis's avatar
Leonardo Solis committed
421
422
		barrier(CLK_LOCAL_MEM_FENCE);

423
424
425
426
427
428
429
		// =============================================================
		gpu_calc_gradient(
				dockpars_rotbondlist_length,
				dockpars_num_of_atoms,
				dockpars_gridsize_x,
				dockpars_gridsize_y,
				dockpars_gridsize_z,
430
431
432
								    	// g1 = gridsize_x
				dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
				dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
433
434
435
436
437
438
439
440
441
442
443
				dockpars_fgrids,
				dockpars_num_of_atypes,
				dockpars_num_of_intraE_contributors,
				dockpars_grid_spacing,
				dockpars_coeff_elec,
				dockpars_qasp,
				dockpars_coeff_desolv,
				// Some OpenCL compilers don't allow declaring 
				// local variables within non-kernel functions.
				// These local variables must be declared in a kernel, 
				// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
444
445
				genotype,
				&energy,
Leonardo Solis's avatar
Leonardo Solis committed
446
447
				&run_id,

448
449
450
451
452
453
454
				calc_coords_x,
				calc_coords_y,
				calc_coords_z,

			        atom_charges_const,
				atom_types_const,
				intraE_contributors_const,
lvs's avatar
lvs committed
455
456
457
				dockpars_smooth,
				reqm,
				reqm_hbond,
lvs's avatar
lvs committed
458
459
		     	        atom1_types_reqm,
		     	        atom2_types_reqm,
460
461
462
463
464
465
466
467
468
469
				VWpars_AC_const,
				VWpars_BD_const,
				dspars_S_const,
				dspars_V_const,
				rotlist_const,
				ref_coords_x_const,
				ref_coords_y_const,
				ref_coords_z_const,
				rotbonds_moving_vectors_const,
				rotbonds_unit_vectors_const,
470
471
472
473
				ref_orientation_quats_const,
				rotbonds_const,
				rotbonds_atoms_const,
				num_rotating_atoms_per_rotbond_const
474
475
476
477
				,
	     			angle_const,
	     			dependence_on_theta_const,
	     			dependence_on_rotangle_const
478
479
480
481
482
483
484
485
			 	// Gradient-related arguments
			 	// Calculate gradients (forces) for intermolecular energy
			 	// Derived from autodockdev/maps.py
				,
				dockpars_num_of_genes,
				gradient_inter_x,
				gradient_inter_y,
				gradient_inter_z,
486
487
488
				gradient_intra_x,
				gradient_intra_y,
				gradient_intra_z,
Leonardo Solis's avatar
Leonardo Solis committed
489
				gradient
490
491
				);
		// =============================================================
492

493
494
495
		// This could be enabled back for double checking
		#if 0
		#if defined (DEBUG_ENERGY_KERNEL5)	
lvs's avatar
lvs committed
496
		if (/*(get_group_id(0) == 0) &&*/ (get_local_id(0) == 0)) {
497
498
		
			#if defined (PRINT_GENES_AND_GRADS)
lvs's avatar
lvs committed
499
			for(uint i = 0; i < dockpars_num_of_genes; i++) {
500
501
502
503
504
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
					printf("%13s %13s %5s %15s %15s\n", "gene_id", "gene.value", "|", "gene.grad", "(autodockdevpy units)");
				}
				printf("%13u %13.6f %5s %15.6f %15.6f\n", i, genotype[i], "|", gradient[i], (i<3)? (gradient[i]/0.375f):(gradient[i]*180.0f/PI_FLOAT));
lvs's avatar
lvs committed
505
			}
506
			#endif
507

508
			#if defined (PRINT_ATOMIC_COORDS)
509
510
511
			for(uint i = 0; i < dockpars_num_of_atoms; i++) {
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
512
					printf("%s\n", "Coordinates calculated by calcgradient.cl");
lvs's avatar
lvs committed
513
					printf("%12s %12s %12s %12s\n", "atom_id", "coords.x", "coords.y", "coords.z");
514
				}
lvs's avatar
lvs committed
515
				printf("%12u %12.6f %12.6f %12.6f\n", i, calc_coords_x[i], calc_coords_y[i], calc_coords_z[i]);
516
517
			}
			printf("\n");
518
			#endif
lvs's avatar
lvs committed
519
		}
520
521
		#endif
		#endif
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551

		if (get_local_id(0) == 0) {
			// Finding maximum of the absolute value for the three translation gradients
			max_trans_grad = fmax(fabs(gradient[0]), fabs(gradient[1]));
			max_trans_grad = fmax(max_trans_grad, fabs(gradient[2]));

			// MAX_DEV_TRANSLATION needs to be expressed in grid size first
			max_trans_stepsize = native_divide(native_divide(MAX_DEV_TRANSLATION, dockpars_grid_spacing), max_trans_grad);

			// Finding maximum of the absolute value for the three rotation gradients
			max_rota_grad = fmax(fabs(gradient[3]), fabs(gradient[4]));	
			max_rota_grad = fmax(max_rota_grad, fabs(gradient[5]));	

			// Note that MAX_DEV_ROTATION is already expressed approprietly
			max_rota_stepsize = native_divide(MAX_DEV_ROTATION, max_rota_grad);
		}

		// Copying torsions genes
		for(uint i = get_local_id(0); 
			 i < dockpars_num_of_genes-6; 
			 i+= NUM_OF_THREADS_PER_BLOCK) {
			torsions_gradient[i] = fabs(gradient[i+6]);
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		// Calculating maximum absolute torsional gene
		// https://stackoverflow.com/questions/36465581/opencl-find-max-in-array
		for (uint i=(dockpars_num_of_genes-6)/2; i>=1; i/=2){
			if (get_local_id(0) < i) {

552
			// This could be enabled back for details
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
			#if 0
			#if defined (DEBUG_MINIMIZER)
			printf("---====--- %u %u %10.10f %-0.10f\n", i, get_local_id(0), torsions_gradient[get_local_id(0)], torsions_gradient[get_local_id(0) + i]);
			#endif
			#endif

				if (torsions_gradient[get_local_id(0)] < torsions_gradient[get_local_id(0) + i]) {
					torsions_gradient[get_local_id(0)] = torsions_gradient[get_local_id(0) + i];
				}
			}
			barrier(CLK_LOCAL_MEM_FENCE);
		}
		if (get_local_id(0) == 0) {
			max_tors_grad = torsions_gradient[get_local_id(0)];
			max_tors_stepsize = native_divide(MAX_DEV_TORSION, max_tors_grad);
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		if (get_local_id(0) == 0) {
			// Calculating the maximum stepsize using previous three
			max_stepsize = fmin(max_trans_stepsize, max_rota_stepsize);
			max_stepsize = fmin(max_stepsize, max_tors_stepsize);

			// Capping the stepsize
			stepsize = fmin(stepsize, max_stepsize);
		}
579
		barrier(CLK_LOCAL_MEM_FENCE);
580
581
		
		for(uint i = get_local_id(0); i < dockpars_num_of_genes; i+= NUM_OF_THREADS_PER_BLOCK) {
582
	     		// Taking step
Leonardo Solis's avatar
Leonardo Solis committed
583
			candidate_genotype[i] = genotype[i] - stepsize * gradient[i];	
584
585

			#if defined (DEBUG_MINIMIZER)
lvs's avatar
lvs committed
586
587
			if (i == 0) {
				printf("\n%s\n", "After calculating gradients:");
588
				printf("%13s %13s %5s %15s %5s %20s\n", "gene_id", "gene", "|", "grad", "|", "cand.gene");
lvs's avatar
lvs committed
589
590
			}
			printf("%13u %13.6f %5s %15.6f %5s %18.6f\n", i, genotype[i], "|", gradient[i], "|", candidate_genotype[i]);
591

592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
			if (i == 0) {
				// This could be enabled back for double checking
				#if 0
				for(uint i = 0; i < dockpars_num_of_genes; i++) {
					if (i == 0) {
						printf("\n%s\n", "----------------------------------------------------------");
						printf("\n%s\n", "After calculating gradients:");
						printf("%13s %13s %5s %15s %20s\n", "gene_id", "gene", "|", "grad", " grad (devpy units)");
					}
					printf("%13u %13.6f %5s %15.6f %18.6f\n", i, genotype[i], "|", gradient[i], (i<3)? (gradient[i]/0.375f):(gradient[i]*180.0f/PI_FLOAT));
				}
				#endif

				printf("\n");
				printf("%20s %10.6f\n", "max_trans_grad: ", max_trans_grad);
				printf("%20s %10.6f\n", "max_rota_grad: ", max_rota_grad);
				printf("%20s %10.6f\n", "max_tors_grad: ", max_tors_grad);

				printf("\n");
				printf("%20s %10.6f\n", "max_trans_stepsize: ", max_trans_stepsize);
				printf("%20s %10.6f\n", "max_rota_stepsize: " , max_rota_stepsize);
				printf("%20s %10.6f\n", "max_tors_stepsize: " , max_tors_stepsize);

				printf("\n");
				printf("%20s %10.6f\n", "max_stepsize: ", max_stepsize);
				printf("%20s %10.6f\n", "stepsize: ", stepsize);
			}
lvs's avatar
lvs committed
619
			#endif
620
	   	}
621
		
622
		// Evaluating candidate
623
		barrier(CLK_LOCAL_MEM_FENCE);
624

Leonardo Solis's avatar
Leonardo Solis committed
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
		// =============================================================
		gpu_calc_energy(dockpars_rotbondlist_length,
				dockpars_num_of_atoms,
				dockpars_gridsize_x,
				dockpars_gridsize_y,
				dockpars_gridsize_z,
								    	// g1 = gridsize_x
				dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
				dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
				dockpars_fgrids,
				dockpars_num_of_atypes,
				dockpars_num_of_intraE_contributors,
				dockpars_grid_spacing,
				dockpars_coeff_elec,
				dockpars_qasp,
				dockpars_coeff_desolv,
lvs's avatar
lvs committed
641
				candidate_genotype, /*genotype,*/ /*WARNING: use "genotype" ONLY to reproduce results*/
Leonardo Solis's avatar
Leonardo Solis committed
642
643
644
645
646
647
648
649
650
651
				&candidate_energy,
				&run_id,
				// Some OpenCL compilers don't allow declaring 
				// local variables within non-kernel functions.
				// These local variables must be declared in a kernel, 
				// and then passed to non-kernel functions.
				calc_coords_x,
				calc_coords_y,
				calc_coords_z,
				partial_energies,
lvs's avatar
lvs committed
652
				#if defined (DEBUG_ENERGY_KERNEL)
Leonardo Solis's avatar
Leonardo Solis committed
653
654
655
				partial_interE,
				partial_intraE,
				#endif
656

Leonardo Solis's avatar
Leonardo Solis committed
657
658
659
				atom_charges_const,
				atom_types_const,
				intraE_contributors_const,
lvs's avatar
lvs committed
660
661
662
#if 0
				true,
#endif
lvs's avatar
lvs committed
663
664
665
				dockpars_smooth,
				reqm,
				reqm_hbond,
lvs's avatar
lvs committed
666
667
		     	        atom1_types_reqm,
		     	        atom2_types_reqm,
Leonardo Solis's avatar
Leonardo Solis committed
668
669
670
671
672
673
674
675
676
677
678
679
680
				VWpars_AC_const,
				VWpars_BD_const,
				dspars_S_const,
				dspars_V_const,
				rotlist_const,
				ref_coords_x_const,
				ref_coords_y_const,
				ref_coords_z_const,
				rotbonds_moving_vectors_const,
				rotbonds_unit_vectors_const,
				ref_orientation_quats_const
				);
		// =============================================================
681

682
		#if defined (DEBUG_ENERGY_KERNEL5)
lvs's avatar
lvs committed
683
		if (/*(get_group_id(0) == 0) &&*/ (get_local_id(0) == 0)) {
684
			#if defined (PRINT_ENERGIES)
685
686
687
688
			printf("\n");
			printf("%-10s %-10.6f \n", "intra: ",  partial_intraE[0]);
			printf("%-10s %-10.6f \n", "grids: ",  partial_interE[0]);
			printf("%-10s %-10.6f \n", "Energy: ", (partial_intraE[0] + partial_interE[0]));
689
			#endif
690

691
			#if defined (PRINT_GENES_AND_GRADS)
lvs's avatar
lvs committed
692
			for(uint i = 0; i < dockpars_num_of_genes; i++) {
693
694
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
lvs's avatar
lvs committed
695
					printf("%13s %13s %5s %15s %15s\n", "gene_id", "cand-gene.value"/* "gene.value"*/, "|", "gene.grad", "(autodockdevpy units)");
696
				}
lvs's avatar
lvs committed
697
				printf("%13u %13.6f %5s %15.6f %15.6f\n", i, candidate_genotype[i] /*genotype[i]*/, "|", gradient[i], (i<3)? (gradient[i]/0.375f):(gradient[i]*180.0f/PI_FLOAT));
698
			}
699
			#endif
700

701
			#if defined (PRINT_ATOMIC_COORDS)
702
703
704
			for(uint i = 0; i < dockpars_num_of_atoms; i++) {
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
705
					printf("%s\n", "Coordinates calculated by calcenergy.cl");
lvs's avatar
lvs committed
706
					printf("%12s %12s %12s %12s\n", "atom_id", "coords.x", "coords.y", "coords.z");
707
				}
lvs's avatar
lvs committed
708
				printf("%12u %12.6f %12.6f %12.6f\n", i, calc_coords_x[i], calc_coords_y[i], calc_coords_z[i]);
lvs's avatar
lvs committed
709
			}
710
			printf("\n");
711
			#endif
lvs's avatar
lvs committed
712
		}
713
		barrier(CLK_LOCAL_MEM_FENCE);
lvs's avatar
lvs committed
714
715
		#endif

lvs's avatar
lvs committed
716
717
718
		#if defined (DEBUG_MINIMIZER)
		if (get_local_id(0) == 0) {
			printf("\n");
719
720
721
			printf("%s\n", "After calculating energies:");
			printf("%13s %5s %15s\n", "energy", "|", "cand.energy");
			printf("%13.6f %5s %15.6f\n", energy, "|", candidate_energy);
lvs's avatar
lvs committed
722
723
724
725
726
			printf("\n");
		}
		barrier(CLK_LOCAL_MEM_FENCE);
		#endif

Leonardo Solis's avatar
Leonardo Solis committed
727
728
		// Checking if E(candidate_genotype) < E(genotype)
		if (candidate_energy < energy){
729
			
Leonardo Solis's avatar
Leonardo Solis committed
730
731
732
			for(uint i = get_local_id(0); 
			 	 i < dockpars_num_of_genes; 
		 	 	 i+= NUM_OF_THREADS_PER_BLOCK) {
lvs's avatar
lvs committed
733
734
735
736
737
738


				#if defined (DEBUG_MINIMIZER)
				//printf("(%-3u) %-15.7f %-10.7f %-10.7f %-10.7f\n", i, stepsize, genotype[i], gradient[i], candidate_genotype[i]);

				if (i == 0) {
739
					printf("%s\n", "Energy IMPROVED! ... then update genotype:");
lvs's avatar
lvs committed
740
741
742
743
					printf("%13s %13s %5s %15s\n", "gene_id", "old.gene", "|", "new.gene");
				}
				printf("%13u %13.6f %5s %15.6f\n", i, genotype[i], "|", candidate_genotype[i]);

744
				#endif
lvs's avatar
lvs committed
745
746
				if (i == 0) {
					#if defined (DEBUG_MINIMIZER)
747
					printf("\n%s\n", "Energy IMPROVED! ... then increase stepsize and update energy:");
lvs's avatar
lvs committed
748
749
					#endif

Leonardo Solis's avatar
Leonardo Solis committed
750
751
					// Increase stepsize
					stepsize *= STEP_INCREASE;
752
753
754

					// Updating energy
					energy = candidate_energy;
755
				}
Leonardo Solis's avatar
Leonardo Solis committed
756

Leonardo Solis's avatar
Leonardo Solis committed
757
758
				// Updating genotype
				genotype[i] = candidate_genotype[i];
759
			}
Leonardo Solis's avatar
Leonardo Solis committed
760
761
		}
		else { 
lvs's avatar
lvs committed
762
763
			#if defined (DEBUG_MINIMIZER)
			if (get_local_id(0) == 0) {
764
				printf("%s\n", "NO energy improvement! ... then decrease stepsize:");
lvs's avatar
lvs committed
765
766
767
			}
			#endif		

Leonardo Solis's avatar
Leonardo Solis committed
768
769
770
771
			if (get_local_id(0) == 0) {
				stepsize *= STEP_DECREASE;
			}
		}
772
		barrier(CLK_LOCAL_MEM_FENCE);
773

Leonardo Solis's avatar
Leonardo Solis committed
774
		// Updating number of stepest-descent iterations (energy evaluations)
775
		if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
776
	    		iteration_cnt = iteration_cnt + 1;
777

lvs's avatar
lvs committed
778
			#if defined (DEBUG_MINIMIZER) || defined (PRINT_MINIMIZER_ENERGY_EVOLUTION)
779
			printf("%20s %10.10f %20s %10.6f\n", "new.stepsize: ", stepsize, "new.energy: ", energy);
780
			#endif
lvs's avatar
lvs committed
781

782
			#if defined (DEBUG_ENERGY_KERNEL5)
Leonardo Solis's avatar
Leonardo Solis committed
783
			printf("%-18s [%-5s]---{%-5s}   [%-10.7f]---{%-10.7f}\n", "-ENERGY-KERNEL5-", "GRIDS", "INTRA", partial_interE[0], partial_intraE[0]);
784
785
			#endif
		}
lvs's avatar
lvs committed
786
		barrier(CLK_LOCAL_MEM_FENCE);
787

lvs's avatar
lvs committed
788
  	} while ((iteration_cnt < dockpars_max_num_of_iters) && (stepsize > 1E-8));
789
790
	// -----------------------------------------------------------------------------
	// -----------------------------------------------------------------------------
791
	// -----------------------------------------------------------------------------
792

793
  	// Updating eval counter and energy
794
	if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
795
		dockpars_evals_of_new_entities[run_id*dockpars_pop_size+entity_id] += iteration_cnt;
Leonardo Solis's avatar
Leonardo Solis committed
796
		dockpars_energies_next[run_id*dockpars_pop_size+entity_id] = energy;
797

lvs's avatar
lvs committed
798
		#if defined (DEBUG_MINIMIZER) || defined (PRINT_MINIMIZER_ENERGY_EVOLUTION)
799
800
801
		printf("\n");
		printf("Termination criteria: ( stepsize <= %10.10f ) OR ( #sd-iters >= %-3u )\n", 1E-8, dockpars_max_num_of_iters);
		printf("-------> End of SD minimization cycle, num of energy evals: %u, final energy: %.6f\n", iteration_cnt, energy);
802
		#endif
803
	}
804

805
806
807
808
	// Mapping torsion angles
	for (uint gene_counter = get_local_id(0);
	     	  gene_counter < dockpars_num_of_genes;
	          gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
809
		   if (gene_counter >= 3) {
Leonardo Solis's avatar
Leonardo Solis committed
810
			    map_angle(&(genotype[gene_counter]));
811
		   }
812
813
	}

814
	// Updating old offspring in population
815
816
817
	barrier(CLK_LOCAL_MEM_FENCE);

	async_work_group_copy(dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
Leonardo Solis's avatar
Leonardo Solis committed
818
			      genotype,
819
			      dockpars_num_of_genes, 0);
820
}