kernel_gradient.cl 29.4 KB
Newer Older
1
2
// Gradient-based steepest descent minimizer
// Alternative to Solis-Wetts
3

lvs's avatar
lvs committed
4
5
6
7
8
9
10
//#define DEBUG_ENERGY_KERNEL5
	//#define PRINT_ENERGIES
	//#define PRINT_GENES_AND_GRADS
	//#define PRINT_ATOMIC_COORDS

#define DEBUG_MINIMIZER
	#define PRINT_MINIMIZER_ENERGY_EVOLUTION
11

12
13
#define DEBUG_INITIAL_2BRT

14
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
15
16
gradient_minimizer(	
			char   dockpars_num_of_atoms,
17
18
19
20
21
			char   dockpars_num_of_atypes,
			int    dockpars_num_of_intraE_contributors,
			char   dockpars_gridsize_x,
			char   dockpars_gridsize_y,
			char   dockpars_gridsize_z,
22
23
24
							    		// g1 = gridsize_x
			uint   dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			uint   dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
25
			float  dockpars_grid_spacing,
26
	 __global const float* restrict dockpars_fgrids, // This is too large to be allocated in __constant 
27
28
29
			int    dockpars_rotbondlist_length,
			float  dockpars_coeff_elec,
			float  dockpars_coeff_desolv,
30
31
	  __global      float* restrict dockpars_conformations_next,
	  __global      float* restrict dockpars_energies_next,
32
  	  __global 	int*   restrict dockpars_evals_of_new_entities,
33
	  __global      uint*  restrict dockpars_prng_states,
34
35
36
			int    dockpars_pop_size,
			int    dockpars_num_of_genes,
			float  dockpars_lsearch_rate,
37
			uint   dockpars_num_of_lsentities,
lvs's avatar
lvs committed
38
			uint   dockpars_max_num_of_iters,
39
			float  dockpars_qasp,
40
41
42
	     __constant float* atom_charges_const,
    	     __constant char*  atom_types_const,
	     __constant char*  intraE_contributors_const,
lvs's avatar
lvs committed
43
44
45
                        float  dockpars_smooth,
 	     __constant float* reqm,
	     __constant float* reqm_hbond,
lvs's avatar
lvs committed
46
47
	     __constant uint*  atom1_types_reqm,
	     __constant uint*  atom2_types_reqm,
48
49
50
51
52
53
54
55
56
57
58
    	     __constant float* VWpars_AC_const,
    	     __constant float* VWpars_BD_const,
             __constant float* dspars_S_const,
             __constant float* dspars_V_const,
             __constant int*   rotlist_const,
    	     __constant float* ref_coords_x_const,
    	     __constant float* ref_coords_y_const,
             __constant float* ref_coords_z_const,
    	     __constant float* rotbonds_moving_vectors_const,
             __constant float* rotbonds_unit_vectors_const,
             __constant float* ref_orientation_quats_const,
59
60
	     __constant int*   rotbonds_const,
	     __constant int*   rotbonds_atoms_const,
lvs's avatar
lvs committed
61
	     __constant int*   num_rotating_atoms_per_rotbond_const
62
63
64
65
			,
	     __constant float* angle_const,
	     __constant float* dependence_on_theta_const,
	     __constant float* dependence_on_rotangle_const
66
67
68
69
70
71
72
73
74
)
//The GPU global function performs gradient-based minimization on (some) entities of conformations_next.
//The number of OpenCL compute units (CU) which should be started equals to num_of_minEntities*num_of_runs.
//This way the first num_of_lsentities entity of each population will be subjected to local search
//(and each CU carries out the algorithm for one entity).
//Since the first entity is always the best one in the current population,
//it is always tested according to the ls probability, and if it not to be
//subjected to local search, the entity with ID num_of_lsentities is selected instead of the first one (with ID 0).
{
75
76
	// -----------------------------------------------------------------------------
	// Determining entity, and its run, energy, and genotype
77
78
	__local int   entity_id;
	__local int   run_id;
Leonardo Solis's avatar
Leonardo Solis committed
79
80
81
  	__local float energy;
	__local float genotype[ACTUAL_GENOTYPE_LENGTH];

lvs's avatar
lvs committed
82
83
	// Iteration counter fot the minimizer
  	__local uint iteration_cnt;  	
84

Leonardo Solis's avatar
Leonardo Solis committed
85
86
87
	// Stepsize for the minimizer
	__local float stepsize;

88
89
	if (get_local_id(0) == 0)
	{
90
		// Choosing a random entity out of the entire population
lvs's avatar
lvs committed
91
		/*
92
		run_id = get_group_id(0);
Leonardo Solis's avatar
Leonardo Solis committed
93
94
		//entity_id = (uint)(dockpars_pop_size * gpu_randf(dockpars_prng_states));
		entity_id = 0;
lvs's avatar
lvs committed
95
		*/
96

97
98
		run_id = get_group_id(0) / dockpars_num_of_lsentities;
		entity_id = get_group_id(0) % dockpars_num_of_lsentities;
99
100
101
102
103
104
105
106
107
108

		// Since entity 0 is the best one due to elitism,
		// it should be subjected to random selection
		if (entity_id == 0) {
			// If entity 0 is not selected according to LS-rate,
			// choosing an other entity
			if (100.0f*gpu_randf(dockpars_prng_states) > dockpars_lsearch_rate) {
				entity_id = dockpars_num_of_lsentities;					
			}
		}
109
		
Leonardo Solis's avatar
Leonardo Solis committed
110
		energy = dockpars_energies_next[run_id*dockpars_pop_size+entity_id];
111

lvs's avatar
lvs committed
112
113
		#if defined (DEBUG_MINIMIZER) || defined (PRINT_MINIMIZER_ENERGY_EVOLUTION)
		printf("\nrun_id:  %5u entity_id: %5u  initial_energy: %.6f\n", run_id, entity_id, energy);
lvs's avatar
lvs committed
114
		#endif
Leonardo Solis's avatar
Leonardo Solis committed
115
116
117

		// Initializing gradient-minimizer counters and flags
    		iteration_cnt  = 0;
Leonardo Solis's avatar
Leonardo Solis committed
118
		stepsize       = STEP_START;
119
120
121
122

		#if defined (DEBUG_MINIMIZER) || defined (PRINT_MINIMIZER_ENERGY_EVOLUTION)
		printf("\ninitial stepsize: %.6f", stepsize);
		#endif
123
124
	}

125
126
	barrier(CLK_LOCAL_MEM_FENCE);

lvs's avatar
lvs committed
127
128
129
  	event_t ev = async_work_group_copy(genotype,
  			      		   dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
                              		   dockpars_num_of_genes, 0);
130

131
132
133
	// Asynchronous copy should be finished by here
	wait_group_events(1,&ev);

134
  	// -----------------------------------------------------------------------------
Leonardo Solis's avatar
Leonardo Solis committed
135
	           
136
	// Partial results of the gradient step
Leonardo Solis's avatar
Leonardo Solis committed
137
138
139
	__local float gradient          [ACTUAL_GENOTYPE_LENGTH];
	__local float candidate_energy;
	__local float candidate_genotype[ACTUAL_GENOTYPE_LENGTH];
140

lvs's avatar
lvs committed
141
142
143
144
	// Dummy variable used only for the first gpu_calc_gradient() call.
	// The corresponding energy for "genotype" is stored in "energy"
	__local float dummy_energy; 	

145
146
147
148
	// -------------------------------------------------------------------
	// Calculate gradients (forces) for intermolecular energy
	// Derived from autodockdev/maps.py
	// -------------------------------------------------------------------
149
	// Gradient of the intermolecular energy per each ligand atom
lvs's avatar
lvs committed
150
	// Also used to store the accummulated gradient per each ligand atom
151
152
153
	__local float gradient_inter_x[MAX_NUM_OF_ATOMS];
	__local float gradient_inter_y[MAX_NUM_OF_ATOMS];
	__local float gradient_inter_z[MAX_NUM_OF_ATOMS];
154

155
	// Gradient of the intramolecular energy per each ligand atom
156
157
158
	__local float gradient_intra_x[MAX_NUM_OF_ATOMS];
	__local float gradient_intra_y[MAX_NUM_OF_ATOMS];
	__local float gradient_intra_z[MAX_NUM_OF_ATOMS];
159

160
	// Ligand-atom position and partial energies
161
162
163
164
165
	__local float calc_coords_x[MAX_NUM_OF_ATOMS];
	__local float calc_coords_y[MAX_NUM_OF_ATOMS];
	__local float calc_coords_z[MAX_NUM_OF_ATOMS];
	__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];

lvs's avatar
lvs committed
166
	#if defined (DEBUG_ENERGY_KERNEL)
167
168
	__local float partial_interE[NUM_OF_THREADS_PER_BLOCK];
	__local float partial_intraE[NUM_OF_THREADS_PER_BLOCK];
169
	#endif
170

171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
	// Enable this for start debugging from a defined genotype
	#if defined (DEBUG_INITIAL_2BRT)
	// 2brt
	genotype[0] = 24.093334;
	genotype[1] = 24.658667;
	genotype[2] = 24.210667;
	genotype[3] = 50.0;
	genotype[4] = 50.0;
	genotype[5] = 50.0;
	genotype[6] = 0.0f;
	genotype[7] = 0.0f;
	genotype[8] = 0.0f;
	genotype[9] = 0.0f;
	genotype[10] = 0.0f;
	genotype[11] = 0.0f;
	genotype[12] = 0.0f;
	genotype[13] = 0.0f;
	genotype[14] = 0.0f;
	genotype[15] = 0.0f;
	genotype[16] = 0.0f;
	genotype[17] = 0.0f;
	genotype[18] = 0.0f;
	genotype[19] = 0.0f;
	genotype[20] = 0.0f;
	#endif

lvs's avatar
lvs committed
197
	// -----------------------------------------------------------------------------
lvs's avatar
lvs committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
	// Calculating gradient
	barrier(CLK_LOCAL_MEM_FENCE);

	// =============================================================
	gpu_calc_gradient(
			dockpars_rotbondlist_length,
			dockpars_num_of_atoms,
			dockpars_gridsize_x,
			dockpars_gridsize_y,
			dockpars_gridsize_z,
								// g1 = gridsize_x
			dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
			dockpars_fgrids,
			dockpars_num_of_atypes,
			dockpars_num_of_intraE_contributors,
			dockpars_grid_spacing,
			dockpars_coeff_elec,
			dockpars_qasp,
			dockpars_coeff_desolv,
			// Some OpenCL compilers don't allow declaring 
			// local variables within non-kernel functions.
			// These local variables must be declared in a kernel, 
			// and then passed to non-kernel functions.
			genotype,
			&dummy_energy,
			&run_id,

			calc_coords_x,
			calc_coords_y,
			calc_coords_z,

			atom_charges_const,
			atom_types_const,
			intraE_contributors_const,
			dockpars_smooth,
			reqm,
			reqm_hbond,
		     	atom1_types_reqm,
		     	atom2_types_reqm,
			VWpars_AC_const,
			VWpars_BD_const,
			dspars_S_const,
			dspars_V_const,
			rotlist_const,
			ref_coords_x_const,
			ref_coords_y_const,
			ref_coords_z_const,
			rotbonds_moving_vectors_const,
			rotbonds_unit_vectors_const,
			ref_orientation_quats_const,
			rotbonds_const,
			rotbonds_atoms_const,
			num_rotating_atoms_per_rotbond_const
			,
	     		angle_const,
	     		dependence_on_theta_const,
	     		dependence_on_rotangle_const
			// Gradient-related arguments
			// Calculate gradients (forces) for intermolecular energy
			// Derived from autodockdev/maps.py
			,
			dockpars_num_of_genes,
			gradient_inter_x,
			gradient_inter_y,
			gradient_inter_z,
			gradient_intra_x,
			gradient_intra_y,
			gradient_intra_z,
			gradient
			);
	// =============================================================

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
	#if defined (DEBUG_INITIAL_2BRT)

	// Evaluating candidate
	barrier(CLK_LOCAL_MEM_FENCE);

	// =============================================================
	gpu_calc_energy(dockpars_rotbondlist_length,
			dockpars_num_of_atoms,
			dockpars_gridsize_x,
			dockpars_gridsize_y,
			dockpars_gridsize_z,
							    	// g1 = gridsize_x
			dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
			dockpars_fgrids,
			dockpars_num_of_atypes,
			dockpars_num_of_intraE_contributors,
			dockpars_grid_spacing,
			dockpars_coeff_elec,
			dockpars_qasp,
			dockpars_coeff_desolv,
			/*candidate_genotype,*/ genotype, /*WARNING: here "genotype" is used to calculate the energy of the manually specified genotype*/
			&energy,
			&run_id,
			// Some OpenCL compilers don't allow declaring 
			// local variables within non-kernel functions.
			// These local variables must be declared in a kernel, 
			// and then passed to non-kernel functions.
			calc_coords_x,
			calc_coords_y,
			calc_coords_z,
			partial_energies,
			#if defined (DEBUG_ENERGY_KERNEL)
			partial_interE,
			partial_intraE,
			#endif

			atom_charges_const,
			atom_types_const,
			intraE_contributors_const,
#if 0
			true,
#endif
			dockpars_smooth,
			reqm,
			reqm_hbond,
	     	        atom1_types_reqm,
	     	        atom2_types_reqm,
			VWpars_AC_const,
			VWpars_BD_const,
			dspars_S_const,
			dspars_V_const,
			rotlist_const,
			ref_coords_x_const,
			ref_coords_y_const,
			ref_coords_z_const,
			rotbonds_moving_vectors_const,
			rotbonds_unit_vectors_const,
			ref_orientation_quats_const
			);
	// =============================================================

	if (get_local_id(0) == 0)
	{
		printf("\ninitial_energy: %.6f\n", energy);
		printf("\ninitial stepsize: %.6f", stepsize);
	}
	barrier(CLK_LOCAL_MEM_FENCE);
	#endif

341
	// Perform gradient-descent iterations
342

343
	#if 0
Leonardo Solis's avatar
Leonardo Solis committed
344
345
346
347
348
349
350
351
352
353
	// 7cpa
	float grid_center_x = 49.836;
	float grid_center_y = 17.609;
	float grid_center_z = 36.272;
	float ligand_center_x = 49.2216976744186;
	float ligand_center_y = 17.793953488372097;
	float ligand_center_z = 36.503837209302326;
	float shoemake_gene_u1 = 0.02;
	float shoemake_gene_u2 = 0.23;
	float shoemake_gene_u3 = 0.95;
354
	#endif
Leonardo Solis's avatar
Leonardo Solis committed
355

356
357
358
359
360
361
362
363
364
	#if 0
	// 3tmn
	float grid_center_x = 52.340;
	float grid_center_y = 15.029;
	float grid_center_z = -2.932;
	float ligand_center_x = 52.22740741;
	float ligand_center_y = 15.51751852;
	float ligand_center_z = -2.40896296;
	#endif
365

lvs's avatar
lvs committed
366
367
	// No need for defining upper and lower genotype bounds
	#if 0
Leonardo Solis's avatar
Leonardo Solis committed
368
369
370
371
372
373
374
375
376
377
378
	// Defining lower and upper bounds for genotypes
	__local float lower_bounds_genotype[ACTUAL_GENOTYPE_LENGTH];
	__local float upper_bounds_genotype[ACTUAL_GENOTYPE_LENGTH];

	for (uint gene_counter = get_local_id(0);
	          gene_counter < dockpars_num_of_genes;
	          gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
		// Translation genes ranges are within the gridbox
		if (gene_counter <= 2) {
			lower_bounds_genotype [gene_counter] = 0.0f;
			upper_bounds_genotype [gene_counter] = (gene_counter == 0) ? dockpars_gridsize_x: 
379
380
381
382
383
							       (gene_counter == 1) ? dockpars_gridsize_y: 
										     dockpars_gridsize_z;
		// Orientation and torsion genes range between [0, 360]
		// See auxiliary_genetic.cl/map_angle()
		} else {
Leonardo Solis's avatar
Leonardo Solis committed
384
385
386
			lower_bounds_genotype [gene_counter] = 0.0f;
			upper_bounds_genotype [gene_counter] = 360.0f;
		}
Leonardo Solis's avatar
Leonardo Solis committed
387
388
389
390

		#if defined (DEBUG_MINIMIZER)
		//printf("(%-3u) %-0.7f %-10.7f %-10.7f %-10.7f\n", gene_counter, stepsize, genotype[gene_counter], lower_bounds_genotype[gene_counter], upper_bounds_genotype[gene_counter]);
		#endif
Leonardo Solis's avatar
Leonardo Solis committed
391
392
	}
	barrier(CLK_LOCAL_MEM_FENCE);
lvs's avatar
lvs committed
393
	#endif
Leonardo Solis's avatar
Leonardo Solis committed
394

lvs's avatar
lvs committed
395
	// Calculating maximum possible stepsize (alpha)
lvs's avatar
lvs committed
396
	__local float max_trans_grad, max_rota_grad, max_tors_grad;
lvs's avatar
lvs committed
397
398
399
	__local float max_trans_stepsize, max_rota_stepsize, max_tors_stepsize;
	__local float max_stepsize;

lvs's avatar
lvs committed
400
401
	// Storing torsion gradients here
	__local float torsions_gradient[ACTUAL_GENOTYPE_LENGTH];
lvs's avatar
lvs committed
402

Leonardo Solis's avatar
Leonardo Solis committed
403
404
405
	// The termination criteria is based on 
	// a maximum number of iterations, and
	// the minimum step size allowed for single-floating point numbers 
lvs's avatar
lvs committed
406
	// (IEEE-754 single float has a precision of about 6 decimal digits)
407
	do {
408
		#if 0
409
		// Specific input genotypes for a ligand with no rotatable bonds (1ac8).
Leonardo Solis's avatar
Leonardo Solis committed
410
		// Translation genes must be expressed in grids in OCLADock (genotype [0|1|2]).
411
412
		// However, for testing purposes, 
		// we start using translation values in real space (Angstrom): {31.79575, 93.743875, 47.699875}
Leonardo Solis's avatar
Leonardo Solis committed
413
		// Rotation genes are expresed in the Shoemake space: genotype [3|4|5]
414
		// xyz_gene_gridspace = gridcenter_gridspace + (input_gene_realspace - gridcenter_realspace)/gridsize
415
416

		// 1ac8				
Leonardo Solis's avatar
Leonardo Solis committed
417
418
419
420
421
422
		genotype[0] = 30 + (31.79575  - 31.924) / 0.375;
		genotype[1] = 30 + (93.743875 - 93.444) / 0.375;
		genotype[2] = 30 + (47.699875 - 47.924) / 0.375;
		genotype[3] = 0.1f;
		genotype[4] = 0.5f;
		genotype[5] = 0.9f;
423
		#endif
424

425
		#if 0
426
		// 3tmn
Leonardo Solis's avatar
Leonardo Solis committed
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
		genotype[0] = 30 + (ligand_center_x - grid_center_x) / 0.375;
		genotype[1] = 30 + (ligand_center_y - grid_center_y) / 0.375;
		genotype[2] = 30 + (ligand_center_z - grid_center_z) / 0.375;
		genotype[3] = shoemake_gene_u1;
		genotype[4] = shoemake_gene_u2;
		genotype[5] = shoemake_gene_u3;
		genotype[6] = 0.0f;
		genotype[7] = 0.0f;
		genotype[8] = 0.0f;
		genotype[9] = 0.0f;
		genotype[10] = 0.0f;
		genotype[11] = 0.0f;
		genotype[12] = 0.0f;
		genotype[13] = 0.0f;
		genotype[14] = 0.0f;
		genotype[15] = 0.0f;
		genotype[16] = 0.0f;
		genotype[17] = 0.0f;
		genotype[18] = 0.0f;
		genotype[19] = 0.0f;
		genotype[20] = 0.0f;
448
		#endif
449

450
451
		#if 0
		// 2j5s
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
		genotype[0] = 28.464;
		genotype[1] = 25.792762;
		genotype[2] = 23.740571;
		genotype[3] = 50.0;
		genotype[4] = 50.0;
		genotype[5] = 50.0;
		genotype[6] = 0.0f;
		genotype[7] = 0.0f;
		genotype[8] = 0.0f;
		genotype[9] = 0.0f;
		genotype[10] = 0.0f;
		genotype[11] = 0.0f;
		genotype[12] = 0.0f;
		genotype[13] = 0.0f;
		genotype[14] = 0.0f;
		genotype[15] = 0.0f;
		genotype[16] = 0.0f;
		genotype[17] = 0.0f;
		genotype[18] = 0.0f;
		genotype[19] = 0.0f;
		genotype[20] = 0.0f;
473
474
		#endif

lvs's avatar
lvs committed
475
		#if 0
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
		// 2brt
		genotype[0] = 24.093334;
		genotype[1] = 24.658667;
		genotype[2] = 24.210667;
		genotype[3] = 50.0;
		genotype[4] = 50.0;
		genotype[5] = 50.0;
		genotype[6] = 0.0f;
		genotype[7] = 0.0f;
		genotype[8] = 0.0f;
		genotype[9] = 0.0f;
		genotype[10] = 0.0f;
		genotype[11] = 0.0f;
		genotype[12] = 0.0f;
		genotype[13] = 0.0f;
		genotype[14] = 0.0f;
		genotype[15] = 0.0f;
		genotype[16] = 0.0f;
		genotype[17] = 0.0f;
		genotype[18] = 0.0f;
		genotype[19] = 0.0f;
		genotype[20] = 0.0f;
		#endif
499

Leonardo Solis's avatar
Leonardo Solis committed
500
		if (get_local_id(0) == 0) {
lvs's avatar
lvs committed
501
502
503
			// Finding maximum of the absolute value for the three translation gradients
			max_trans_grad = fmax(fabs(gradient[0]), fabs(gradient[1]));
			max_trans_grad = fmax(max_trans_grad, fabs(gradient[2]));
Leonardo Solis's avatar
Leonardo Solis committed
504

lvs's avatar
lvs committed
505
506
			// MAX_DEV_TRANSLATION needs to be expressed in grid size first
			max_trans_stepsize = native_divide(native_divide(MAX_DEV_TRANSLATION, dockpars_grid_spacing), max_trans_grad);
Leonardo Solis's avatar
Leonardo Solis committed
507

lvs's avatar
lvs committed
508
			// Finding maximum of the absolute value for the three rotation gradients
lvs's avatar
lvs committed
509
510
			max_rota_grad = fmax(fabs(gradient[3]), fabs(gradient[4]));	
			max_rota_grad = fmax(max_rota_grad, fabs(gradient[5]));	
Leonardo Solis's avatar
Leonardo Solis committed
511
512
513

			// Note that MAX_DEV_ROTATION
			// is already expressed within [0, 1]
lvs's avatar
lvs committed
514
			max_rota_stepsize = native_divide(MAX_DEV_ROTATION, max_rota_grad);
Leonardo Solis's avatar
Leonardo Solis committed
515
516
517
518
519
520
		}

		// Copying torsions genes
		for(uint i = get_local_id(0); 
			 i < dockpars_num_of_genes-6; 
			 i+= NUM_OF_THREADS_PER_BLOCK) {
lvs's avatar
lvs committed
521
			torsions_gradient[i] = fabs(gradient[i+6]);
Leonardo Solis's avatar
Leonardo Solis committed
522
523
524
525
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		// Calculating maximum absolute torsional gene
lvs's avatar
lvs committed
526
527
		// https://stackoverflow.com/questions/36465581/opencl-find-max-in-array
		for (uint i=(dockpars_num_of_genes-6)/2; i>=1; i/=2){
Leonardo Solis's avatar
Leonardo Solis committed
528
			if (get_local_id(0) < i) {
Leonardo Solis's avatar
Leonardo Solis committed
529

lvs's avatar
lvs committed
530
			#if 0
Leonardo Solis's avatar
Leonardo Solis committed
531
			#if defined (DEBUG_MINIMIZER)
lvs's avatar
lvs committed
532
533
			printf("---====--- %u %u %10.10f %-0.10f\n", i, get_local_id(0), torsions_gradient[get_local_id(0)], torsions_gradient[get_local_id(0) + i]);
			#endif
Leonardo Solis's avatar
Leonardo Solis committed
534
535
			#endif

lvs's avatar
lvs committed
536
537
				if (torsions_gradient[get_local_id(0)] < torsions_gradient[get_local_id(0) + i]) {
					torsions_gradient[get_local_id(0)] = torsions_gradient[get_local_id(0) + i];
Leonardo Solis's avatar
Leonardo Solis committed
538
539
540
541
542
				}
			}
			barrier(CLK_LOCAL_MEM_FENCE);
		}
		if (get_local_id(0) == 0) {
lvs's avatar
lvs committed
543
			max_tors_grad = torsions_gradient[get_local_id(0)];
lvs's avatar
lvs committed
544
			max_tors_stepsize = native_divide(MAX_DEV_TORSION, max_tors_grad);
Leonardo Solis's avatar
Leonardo Solis committed
545
546
547
548
549
		}

		barrier(CLK_LOCAL_MEM_FENCE);

		if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
550
			// Calculating the maximum stepsize using previous three
lvs's avatar
lvs committed
551
552
			max_stepsize = fmin(max_trans_stepsize, max_rota_stepsize);
			max_stepsize = fmin(max_stepsize, max_tors_stepsize);
Leonardo Solis's avatar
Leonardo Solis committed
553

Leonardo Solis's avatar
Leonardo Solis committed
554
			// Capping the stepsize
lvs's avatar
lvs committed
555
			stepsize = fmin(stepsize, max_stepsize);
Leonardo Solis's avatar
Leonardo Solis committed
556

lvs's avatar
lvs committed
557
			#if 1
Leonardo Solis's avatar
Leonardo Solis committed
558
			#if defined (DEBUG_MINIMIZER)
lvs's avatar
lvs committed
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
			for(uint i = 0; i < dockpars_num_of_genes; i++) {
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
					printf("\n%s\n", "Before calculating gradients:");
					printf("%13s %13s %5s %15s %20s\n", "gene_id", "gene", "|", "grad", " grad (devpy units)");
				}
				printf("%13u %13.6f %5s %15.6f %18.6f\n", i, genotype[i], "|", gradient[i], (i<3)? (gradient[i]/0.375f):(gradient[i]*180.0f/PI_FLOAT));
			}

			printf("\n");
			printf("%20s %10.6f\n", "max_trans_grad: ", max_trans_grad);
			printf("%20s %10.6f\n", "max_rota_grad: ", max_rota_grad);
			printf("%20s %10.6f\n", "max_tors_grad: ", max_tors_grad);

			printf("\n");
			printf("%20s %10.6f\n", "max_trans_stepsize: ", max_trans_stepsize);
			printf("%20s %10.6f\n", "max_rota_stepsize: " , max_rota_stepsize);
			printf("%20s %10.6f\n", "max_tors_stepsize: " , max_tors_stepsize);

			printf("\n");
579
			printf("%20s %10.6f\n\n", "max_stepsize: ", max_stepsize);
lvs's avatar
lvs committed
580
581
			printf("%20s %10.6f\n\n", "stepsize: ", stepsize);
			#endif
Leonardo Solis's avatar
Leonardo Solis committed
582
			#endif
Leonardo Solis's avatar
Leonardo Solis committed
583
584
		}	

585
		// Calculating gradient
Leonardo Solis's avatar
Leonardo Solis committed
586
587
		barrier(CLK_LOCAL_MEM_FENCE);

588
589
590
591
592
593
594
		// =============================================================
		gpu_calc_gradient(
				dockpars_rotbondlist_length,
				dockpars_num_of_atoms,
				dockpars_gridsize_x,
				dockpars_gridsize_y,
				dockpars_gridsize_z,
595
596
597
								    	// g1 = gridsize_x
				dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
				dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
598
599
600
601
602
603
604
605
606
607
608
				dockpars_fgrids,
				dockpars_num_of_atypes,
				dockpars_num_of_intraE_contributors,
				dockpars_grid_spacing,
				dockpars_coeff_elec,
				dockpars_qasp,
				dockpars_coeff_desolv,
				// Some OpenCL compilers don't allow declaring 
				// local variables within non-kernel functions.
				// These local variables must be declared in a kernel, 
				// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
609
610
				genotype,
				&energy,
Leonardo Solis's avatar
Leonardo Solis committed
611
612
				&run_id,

613
614
615
616
617
618
619
				calc_coords_x,
				calc_coords_y,
				calc_coords_z,

			        atom_charges_const,
				atom_types_const,
				intraE_contributors_const,
lvs's avatar
lvs committed
620
621
622
				dockpars_smooth,
				reqm,
				reqm_hbond,
lvs's avatar
lvs committed
623
624
		     	        atom1_types_reqm,
		     	        atom2_types_reqm,
625
626
627
628
629
630
631
632
633
634
				VWpars_AC_const,
				VWpars_BD_const,
				dspars_S_const,
				dspars_V_const,
				rotlist_const,
				ref_coords_x_const,
				ref_coords_y_const,
				ref_coords_z_const,
				rotbonds_moving_vectors_const,
				rotbonds_unit_vectors_const,
635
636
637
638
				ref_orientation_quats_const,
				rotbonds_const,
				rotbonds_atoms_const,
				num_rotating_atoms_per_rotbond_const
639
640
641
642
				,
	     			angle_const,
	     			dependence_on_theta_const,
	     			dependence_on_rotangle_const
643
644
645
646
647
648
649
650
			 	// Gradient-related arguments
			 	// Calculate gradients (forces) for intermolecular energy
			 	// Derived from autodockdev/maps.py
				,
				dockpars_num_of_genes,
				gradient_inter_x,
				gradient_inter_y,
				gradient_inter_z,
651
652
653
				gradient_intra_x,
				gradient_intra_y,
				gradient_intra_z,
Leonardo Solis's avatar
Leonardo Solis committed
654
				gradient
655
656
				);
		// =============================================================
657

658
659
660
		// This could be enabled back for double checking
		#if 0
		#if defined (DEBUG_ENERGY_KERNEL5)	
lvs's avatar
lvs committed
661
		if (/*(get_group_id(0) == 0) &&*/ (get_local_id(0) == 0)) {
662
663
		
			#if defined (PRINT_GENES_AND_GRADS)
lvs's avatar
lvs committed
664
			for(uint i = 0; i < dockpars_num_of_genes; i++) {
665
666
667
668
669
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
					printf("%13s %13s %5s %15s %15s\n", "gene_id", "gene.value", "|", "gene.grad", "(autodockdevpy units)");
				}
				printf("%13u %13.6f %5s %15.6f %15.6f\n", i, genotype[i], "|", gradient[i], (i<3)? (gradient[i]/0.375f):(gradient[i]*180.0f/PI_FLOAT));
lvs's avatar
lvs committed
670
			}
671
			#endif
672

673
			#if defined (PRINT_ATOMIC_COORDS)
674
675
676
			for(uint i = 0; i < dockpars_num_of_atoms; i++) {
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
677
					printf("%s\n", "Coordinates calculated by calcgradient.cl");
lvs's avatar
lvs committed
678
					printf("%12s %12s %12s %12s\n", "atom_id", "coords.x", "coords.y", "coords.z");
679
				}
lvs's avatar
lvs committed
680
				printf("%12u %12.6f %12.6f %12.6f\n", i, calc_coords_x[i], calc_coords_y[i], calc_coords_z[i]);
681
682
			}
			printf("\n");
683
			#endif
lvs's avatar
lvs committed
684
		}
685
686
		#endif
		#endif
687
688
		
		for(uint i = get_local_id(0); i < dockpars_num_of_genes; i+= NUM_OF_THREADS_PER_BLOCK) {
689
	     		// Taking step
Leonardo Solis's avatar
Leonardo Solis committed
690
			candidate_genotype[i] = genotype[i] - stepsize * gradient[i];	
691
692

			#if defined (DEBUG_MINIMIZER)
Leonardo Solis's avatar
Leonardo Solis committed
693
			//printf("(%-3u) %-0.7f %-10.7f %-10.7f %-10.7f\n", i, stepsize, genotype[i], gradient[i], candidate_genotype[i]);
lvs's avatar
lvs committed
694
695
696
697
698
699

			if (i == 0) {
				printf("\n%s\n", "After calculating gradients:");
				printf("%13s %13s %5s %15s %5s %20s\n", "gene_id", "gene", "|", "grad", "|", "cand_gene");
			}
			printf("%13u %13.6f %5s %15.6f %5s %18.6f\n", i, genotype[i], "|", gradient[i], "|", candidate_genotype[i]);
700
			#endif
701

lvs's avatar
lvs committed
702
703
			// No need for defining upper and lower genotype bounds
			#if 0
Leonardo Solis's avatar
Leonardo Solis committed
704
			// Putting genes back within bounds
lvs's avatar
lvs committed
705
706
			candidate_genotype[i] = fmin(candidate_genotype[i], upper_bounds_genotype[i]);
			candidate_genotype[i] = fmax(candidate_genotype[i], lower_bounds_genotype[i]);
lvs's avatar
lvs committed
707
			#endif
708
	   	}
709
		
710
		// Evaluating candidate
711
		barrier(CLK_LOCAL_MEM_FENCE);
712

Leonardo Solis's avatar
Leonardo Solis committed
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
		// =============================================================
		gpu_calc_energy(dockpars_rotbondlist_length,
				dockpars_num_of_atoms,
				dockpars_gridsize_x,
				dockpars_gridsize_y,
				dockpars_gridsize_z,
								    	// g1 = gridsize_x
				dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
				dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
				dockpars_fgrids,
				dockpars_num_of_atypes,
				dockpars_num_of_intraE_contributors,
				dockpars_grid_spacing,
				dockpars_coeff_elec,
				dockpars_qasp,
				dockpars_coeff_desolv,
lvs's avatar
lvs committed
729
				candidate_genotype, /*genotype,*/ /*WARNING: use "genotype" ONLY to reproduce results*/
Leonardo Solis's avatar
Leonardo Solis committed
730
731
732
733
734
735
736
737
738
739
				&candidate_energy,
				&run_id,
				// Some OpenCL compilers don't allow declaring 
				// local variables within non-kernel functions.
				// These local variables must be declared in a kernel, 
				// and then passed to non-kernel functions.
				calc_coords_x,
				calc_coords_y,
				calc_coords_z,
				partial_energies,
lvs's avatar
lvs committed
740
				#if defined (DEBUG_ENERGY_KERNEL)
Leonardo Solis's avatar
Leonardo Solis committed
741
742
743
				partial_interE,
				partial_intraE,
				#endif
744

Leonardo Solis's avatar
Leonardo Solis committed
745
746
747
				atom_charges_const,
				atom_types_const,
				intraE_contributors_const,
lvs's avatar
lvs committed
748
749
750
#if 0
				true,
#endif
lvs's avatar
lvs committed
751
752
753
				dockpars_smooth,
				reqm,
				reqm_hbond,
lvs's avatar
lvs committed
754
755
		     	        atom1_types_reqm,
		     	        atom2_types_reqm,
Leonardo Solis's avatar
Leonardo Solis committed
756
757
758
759
760
761
762
763
764
765
766
767
768
				VWpars_AC_const,
				VWpars_BD_const,
				dspars_S_const,
				dspars_V_const,
				rotlist_const,
				ref_coords_x_const,
				ref_coords_y_const,
				ref_coords_z_const,
				rotbonds_moving_vectors_const,
				rotbonds_unit_vectors_const,
				ref_orientation_quats_const
				);
		// =============================================================
769

770
		#if defined (DEBUG_ENERGY_KERNEL5)
lvs's avatar
lvs committed
771
		if (/*(get_group_id(0) == 0) &&*/ (get_local_id(0) == 0)) {
772
			#if defined (PRINT_ENERGIES)
773
774
775
776
			printf("\n");
			printf("%-10s %-10.6f \n", "intra: ",  partial_intraE[0]);
			printf("%-10s %-10.6f \n", "grids: ",  partial_interE[0]);
			printf("%-10s %-10.6f \n", "Energy: ", (partial_intraE[0] + partial_interE[0]));
777
			#endif
778

779
			#if defined (PRINT_GENES_AND_GRADS)
lvs's avatar
lvs committed
780
			for(uint i = 0; i < dockpars_num_of_genes; i++) {
781
782
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
lvs's avatar
lvs committed
783
					printf("%13s %13s %5s %15s %15s\n", "gene_id", "cand-gene.value"/* "gene.value"*/, "|", "gene.grad", "(autodockdevpy units)");
784
				}
lvs's avatar
lvs committed
785
				printf("%13u %13.6f %5s %15.6f %15.6f\n", i, candidate_genotype[i] /*genotype[i]*/, "|", gradient[i], (i<3)? (gradient[i]/0.375f):(gradient[i]*180.0f/PI_FLOAT));
786
			}
787
			#endif
788

789
			#if defined (PRINT_ATOMIC_COORDS)
790
791
792
			for(uint i = 0; i < dockpars_num_of_atoms; i++) {
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
793
					printf("%s\n", "Coordinates calculated by calcenergy.cl");
lvs's avatar
lvs committed
794
					printf("%12s %12s %12s %12s\n", "atom_id", "coords.x", "coords.y", "coords.z");
795
				}
lvs's avatar
lvs committed
796
				printf("%12u %12.6f %12.6f %12.6f\n", i, calc_coords_x[i], calc_coords_y[i], calc_coords_z[i]);
lvs's avatar
lvs committed
797
			}
798
			printf("\n");
799
			#endif
lvs's avatar
lvs committed
800
801
802
		}
		#endif

lvs's avatar
lvs committed
803
804
805
806
807
808
809
810
811
812
813
		#if defined (DEBUG_MINIMIZER)
		barrier(CLK_LOCAL_MEM_FENCE);
		if (get_local_id(0) == 0) {
			printf("\n");
			printf("%-20s %-13.6f\n", "Old energy: ", energy);
			printf("%-20s %-13.6f\n", "New energy: ", candidate_energy);
			printf("\n");
		}
		barrier(CLK_LOCAL_MEM_FENCE);
		#endif

Leonardo Solis's avatar
Leonardo Solis committed
814
815
		// Checking if E(candidate_genotype) < E(genotype)
		if (candidate_energy < energy){
816
			
Leonardo Solis's avatar
Leonardo Solis committed
817
818
819
			for(uint i = get_local_id(0); 
			 	 i < dockpars_num_of_genes; 
		 	 	 i+= NUM_OF_THREADS_PER_BLOCK) {
lvs's avatar
lvs committed
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837


				#if defined (DEBUG_MINIMIZER)
				//printf("(%-3u) %-15.7f %-10.7f %-10.7f %-10.7f\n", i, stepsize, genotype[i], gradient[i], candidate_genotype[i]);

				if (i == 0) {
					printf("\n%s\n", "Energy improved! ... then update genotype and energy:");
					printf("%13s %13s %5s %15s\n", "gene_id", "old.gene", "|", "new.gene");
				}
				printf("%13u %13.6f %5s %15.6f\n", i, genotype[i], "|", candidate_genotype[i]);

				if (i == 0) {
					printf("\n");
					printf("%13s %5s %15s\n", "old.energy", "|", "new.energy");
					printf("%13.6f %5s %15.6f\n", energy, "|", candidate_energy);
				}	
				#endif				

Leonardo Solis's avatar
Leonardo Solis committed
838
				if (i == 0) {
lvs's avatar
lvs committed
839
840
841
842
843
				
					#if defined (DEBUG_MINIMIZER)
					printf("\n%s\n", "Energy improved! ... then increase step_size:");
					#endif

Leonardo Solis's avatar
Leonardo Solis committed
844
845
					// Updating energy
					energy = candidate_energy;
846

Leonardo Solis's avatar
Leonardo Solis committed
847
848
					// Increase stepsize
					stepsize *= STEP_INCREASE;
849
				}
Leonardo Solis's avatar
Leonardo Solis committed
850

Leonardo Solis's avatar
Leonardo Solis committed
851
852
				// Updating genotype
				genotype[i] = candidate_genotype[i];
853
			}
Leonardo Solis's avatar
Leonardo Solis committed
854
855
		}
		else { 
lvs's avatar
lvs committed
856
857
858
859
860
861
			#if defined (DEBUG_MINIMIZER)
			if (get_local_id(0) == 0) {
				printf("\n%s\n", "NO Energy improvement! ... then decrease stepsize:");
			}
			#endif		

Leonardo Solis's avatar
Leonardo Solis committed
862
863
864
865
			if (get_local_id(0) == 0) {
				stepsize *= STEP_DECREASE;
			}
		}
866

867
		barrier(CLK_LOCAL_MEM_FENCE);
868

lvs's avatar
lvs committed
869
870
871
872
873
874
875
		#if defined (DEBUG_MINIMIZER)
		if (get_local_id(0) == 0) {
			printf("\n");
			printf("%20s %10.6f\n\n", "stepsize: ", stepsize);
		}
		#endif

Leonardo Solis's avatar
Leonardo Solis committed
876
		// Updating number of stepest-descent iterations (energy evaluations)
877
		if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
878
	    		iteration_cnt = iteration_cnt + 1;
879

lvs's avatar
lvs committed
880
881
			#if defined (DEBUG_MINIMIZER) || defined (PRINT_MINIMIZER_ENERGY_EVOLUTION)
			printf("# sd-iters: %-3u, stepsize: %10.10f, E: %10.6f\n", iteration_cnt, stepsize, energy);
882
			#endif
lvs's avatar
lvs committed
883

884
			#if defined (DEBUG_ENERGY_KERNEL5)
Leonardo Solis's avatar
Leonardo Solis committed
885
			printf("%-18s [%-5s]---{%-5s}   [%-10.7f]---{%-10.7f}\n", "-ENERGY-KERNEL5-", "GRIDS", "INTRA", partial_interE[0], partial_intraE[0]);
886
887
			#endif
		}
888

lvs's avatar
lvs committed
889
  	} while ((iteration_cnt < dockpars_max_num_of_iters) && (stepsize > 1E-8));
890

lvs's avatar
lvs committed
891
892
893
894
895
896
	#if defined (DEBUG_MINIMIZER) || defined (PRINT_MINIMIZER_ENERGY_EVOLUTION)
	if (get_local_id(0) == 0) {
		printf("Termination criteria: ( #sd-iters < %-3u ) && ( stepsize > %10.10f )\n", dockpars_max_num_of_iters, 1E-8);
	}
	#endif

897
	// -----------------------------------------------------------------------------
898

899
  	// Updating eval counter and energy
900
	if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
901
		dockpars_evals_of_new_entities[run_id*dockpars_pop_size+entity_id] += iteration_cnt;
Leonardo Solis's avatar
Leonardo Solis committed
902
		dockpars_energies_next[run_id*dockpars_pop_size+entity_id] = energy;
903

lvs's avatar
lvs committed
904
		#if defined (DEBUG_MINIMIZER) || defined (PRINT_MINIMIZER_ENERGY_EVOLUTION)
905
		printf("-------> End of grad-min cycle, num of evals: %u, final energy: %.6f\n", iteration_cnt, energy);
906
		#endif
907
	}
908

909
910
911
912
	// Mapping torsion angles
	for (uint gene_counter = get_local_id(0);
	     	  gene_counter < dockpars_num_of_genes;
	          gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
913
		   if (gene_counter >= 3) {
Leonardo Solis's avatar
Leonardo Solis committed
914
			    map_angle(&(genotype[gene_counter]));
915
		   }
916
917
	}

918
	// Updating old offspring in population
919
920
921
	barrier(CLK_LOCAL_MEM_FENCE);

	async_work_group_copy(dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
Leonardo Solis's avatar
Leonardo Solis committed
922
			      genotype,
923
			      dockpars_num_of_genes, 0);
924
}