kernel_fire.cl 27.4 KB
Newer Older
lvs's avatar
lvs committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
// Gradient-based fire minimizer
// FIRE: (F)ast (I)nertial (R)elaxation (E)ngine
// https://www.math.uni-bielefeld.de/~gaehler/papers/fire.pdf
// https://doi.org/10.1103/PhysRevLett.97.170201
// Alternative to Solis-Wets / Steepest-Descent / AdaDelta

// These parameters differ from the original implementation:
//            - DT_INC is larger          [ larger increments of "dt" ]
//            - DT_DEC is closer to 1.0   [ smaller decrements of "dt" ]
//            - ALPHA_START is larger     [ less inertia ]
        
// As a result, this implementation is "less local" than the original.
// In other words, it is easier to exit the current local minima and
// jump to a nearby local minima.
lvs's avatar
lvs committed
15
16

// Fire parameters (TODO: to be moved to header file?)
lvs's avatar
lvs committed
17
18
19
20
21
22
23
24
#define SUCCESS_MIN		5			// N_min   = 5
#define DT_INC			1.2f			// f_inc   = 1.1
#define DT_DEC			0.8f			// f_dec   = 0.5
#define ALPHA_START 		0.2f			// a_start = 0.1
#define ALPHA_DEC		0.99f			// f_a     = 0.99

// Tunable parameters
// This one tuned by trial and error
lvs's avatar
lvs committed
25
26
27
#define DT_MAX      		10.0f
#define DT_MAX_DIV_THREE	(DT_MAX / 3.0f)

lvs's avatar
lvs committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
// New parameter
// Not in original implementation
// if "dt" becomes smaller than this value, stop optimization
#define DT_MIN			1e-6

// Enabling "DEBUG_ENERGY_FIRE" requires
// manually enabling "DEBUG_ENERGY_KERNEL" in calcenergy.cl
//#define DEBUG_ENERGY_FIRE
	//#define PRINT_FIRE_ENERGIES
	//#define PRINT_FIRE_GENES_AND_GRADS
	//#define PRINT_FIRE_ATOMIC_COORDS
	//#define PRINT_FIRE_PARAMETERS

// Enable DEBUG_FIRE_MINIMIZER for a seeing a detailed FIRE evolution
// If only PRINT_FIRE_MINIMIZER_ENERGY_EVOLUTION is enabled,
// then a only a simplified FIRE evolution will be shown
//#define DEBUG_FIRE_MINIMIZER
	//#define PRINT_FIRE_MINIMIZER_ENERGY_EVOLUTION
lvs's avatar
lvs committed
46

lvs's avatar
lvs committed
47
48
// Enable this for debugging FIRE from a defined initial genotype
//#define DEBUG_FIRE_INITIAL_2BRT
lvs's avatar
lvs committed
49
50
51

__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
gradient_minFire(	
lvs's avatar
lvs committed
52
53
54
55
56
57
			    char   dockpars_num_of_atoms,
			    char   dockpars_num_of_atypes,
			    int    dockpars_num_of_intraE_contributors,
			    char   dockpars_gridsize_x,
			    char   dockpars_gridsize_y,
			    char   dockpars_gridsize_z,
lvs's avatar
lvs committed
58
							    		// g1 = gridsize_x
lvs's avatar
lvs committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
			    uint   dockpars_gridsize_x_times_y, 	// g2 = gridsize_x * gridsize_y
			    uint   dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
			    float  dockpars_grid_spacing,
	 __global const     float* restrict dockpars_fgrids, 		// This is too large to be allocated in __constant 
			    int    dockpars_rotbondlist_length,
			    float  dockpars_coeff_elec,
			    float  dockpars_coeff_desolv,
	  __global          float* restrict dockpars_conformations_next,
	  __global          float* restrict dockpars_energies_next,
  	  __global 	    int*   restrict dockpars_evals_of_new_entities,
	  __global          uint*  restrict dockpars_prng_states,
			    int    dockpars_pop_size,
			    int    dockpars_num_of_genes,
			    float  dockpars_lsearch_rate,
			    uint   dockpars_num_of_lsentities,
			    uint   dockpars_max_num_of_iters,
			    float  dockpars_qasp,
			    float  dockpars_smooth,

	  __constant        kernelconstant_interintra* 	 kerconst_interintra,
	  __global const    kernelconstant_intracontrib* kerconst_intracontrib,
	  __constant        kernelconstant_intra*	 kerconst_intra,
	  __constant        kernelconstant_rotlist*   	 kerconst_rotlist,
	  __constant        kernelconstant_conform*	 kerconst_conform
83
			,
lvs's avatar
lvs committed
84
85
86
87
88
89
90
	  __constant int*   	  rotbonds_const,
	  __global   const int*   rotbonds_atoms_const,
	  __constant int*   	  num_rotating_atoms_per_rotbond_const
			,
	  __global   const float* angle_const,
	  __constant       float* dependence_on_theta_const,
	  __constant       float* dependence_on_rotangle_const
lvs's avatar
lvs committed
91
92
93
94
95
96
97
98
99
)
//The GPU global function performs gradient-based minimization on (some) entities of conformations_next.
//The number of OpenCL compute units (CU) which should be started equals to num_of_minEntities*num_of_runs.
//This way the first num_of_lsentities entity of each population will be subjected to local search
//(and each CU carries out the algorithm for one entity).
//Since the first entity is always the best one in the current population,
//it is always tested according to the ls probability, and if it not to be
//subjected to local search, the entity with ID num_of_lsentities is selected instead of the first one (with ID 0).
{
lvs's avatar
lvs committed
100
101
102


  	// -----------------------------------------------------------------------------
lvs's avatar
lvs committed
103
	// -----------------------------------------------------------------------------
lvs's avatar
lvs committed
104
105
	// -----------------------------------------------------------------------------

lvs's avatar
lvs committed
106
107
108
109
110
111
112
113
114
115
116
117
	// Determining entity, and its run, energy, and genotype
	__local int   entity_id;
	__local int   run_id;
  	__local float energy;
	__local float genotype[ACTUAL_GENOTYPE_LENGTH];

	// Iteration counter fot the minimizer
  	__local uint iteration_cnt;  	

	if (get_local_id(0) == 0)
	{
		// Choosing a random entity out of the entire population
lvs's avatar
lvs committed
118
		/*
lvs's avatar
lvs committed
119
120
121
		run_id = get_group_id(0);
		//entity_id = (uint)(dockpars_pop_size * gpu_randf(dockpars_prng_states));
		entity_id = 0;
lvs's avatar
lvs committed
122
123
		*/

lvs's avatar
lvs committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
		run_id = get_group_id(0) / dockpars_num_of_lsentities;
		entity_id = get_group_id(0) % dockpars_num_of_lsentities;

		// Since entity 0 is the best one due to elitism,
		// it should be subjected to random selection
		if (entity_id == 0) {
			// If entity 0 is not selected according to LS-rate,
			// choosing an other entity
			if (100.0f*gpu_randf(dockpars_prng_states) > dockpars_lsearch_rate) {
				entity_id = dockpars_num_of_lsentities;					
			}
		}
		
		energy = dockpars_energies_next[run_id*dockpars_pop_size+entity_id];

		// Initializing gradient-minimizer counters and flags
    		iteration_cnt  = 0;

lvs's avatar
lvs committed
142
143
144
145
146
147
148
149
150
151
		#if defined (DEBUG_FIRE_MINIMIZER) || defined (PRINT_FIRE_MINIMIZER_ENERGY_EVOLUTION)
		printf("\n");
		printf("-------> Start of FIRE minimization cycle\n");
		printf("%20s %6u\n", "run_id: ", run_id);
		printf("%20s %6u\n", "entity_id: ", entity_id);
		printf("\n");
		printf("%20s \n", "LGA genotype: ");
		printf("%20s %.6f\n", "initial energy: ", energy);
		#endif
	}
lvs's avatar
lvs committed
152
153
154
155
156
157
	barrier(CLK_LOCAL_MEM_FENCE);

  	event_t ev = async_work_group_copy(genotype,
  			      		   dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
                              		   dockpars_num_of_genes, 0);

158
	// Asynchronous copy should be finished by here
159
	wait_group_events(1, &ev);
160

lvs's avatar
lvs committed
161
  	// -----------------------------------------------------------------------------
lvs's avatar
lvs committed
162
163
	// -----------------------------------------------------------------------------
	// -----------------------------------------------------------------------------
lvs's avatar
lvs committed
164
165
166
	           
	// Partial results of the gradient step
	__local float gradient[ACTUAL_GENOTYPE_LENGTH];
lvs's avatar
lvs committed
167
168
169
170
171
172
173
174
175
176
	__local float candidate_gradient[ACTUAL_GENOTYPE_LENGTH];

	// Energy may go up, so we keep track of the best energy ever calculated.
	// Then, we return the genotype corresponding 
	// to the best observed energy, i.e. "best_genotype"
	__local float best_energy;
	__local float best_genotype[ACTUAL_GENOTYPE_LENGTH];

	__local float candidate_energy;
	__local float candidate_genotype[ACTUAL_GENOTYPE_LENGTH];
lvs's avatar
lvs committed
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203

	// -------------------------------------------------------------------
	// Calculate gradients (forces) for intermolecular energy
	// Derived from autodockdev/maps.py
	// -------------------------------------------------------------------
	// Gradient of the intermolecular energy per each ligand atom
	// Also used to store the accummulated gradient per each ligand atom
	__local float gradient_inter_x[MAX_NUM_OF_ATOMS];
	__local float gradient_inter_y[MAX_NUM_OF_ATOMS];
	__local float gradient_inter_z[MAX_NUM_OF_ATOMS];

	// Gradient of the intramolecular energy per each ligand atom
	__local float gradient_intra_x[MAX_NUM_OF_ATOMS];
	__local float gradient_intra_y[MAX_NUM_OF_ATOMS];
	__local float gradient_intra_z[MAX_NUM_OF_ATOMS];

	// Ligand-atom position and partial energies
	__local float calc_coords_x[MAX_NUM_OF_ATOMS];
	__local float calc_coords_y[MAX_NUM_OF_ATOMS];
	__local float calc_coords_z[MAX_NUM_OF_ATOMS];
	__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];

	#if defined (DEBUG_ENERGY_KERNEL)
	__local float partial_interE[NUM_OF_THREADS_PER_BLOCK];
	__local float partial_intraE[NUM_OF_THREADS_PER_BLOCK];
	#endif

lvs's avatar
lvs committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
	// Enable this for debugging FIRE from a defined initial genotype
	#if defined (DEBUG_FIRE_INITIAL_2BRT)
	if (get_local_id(0) == 0) {
		// 2brt
		genotype[0]  = 24.093334;
		genotype[1]  = 24.658667;
		genotype[2]  = 24.210667;
		genotype[3]  = 50.0;
		genotype[4]  = 50.0;
		genotype[5]  = 50.0;
		genotype[6]  = 0.0f;
		genotype[7]  = 0.0f;
		genotype[8]  = 0.0f;
		genotype[9]  = 0.0f;
		genotype[10] = 0.0f;
		genotype[11] = 0.0f;
		genotype[12] = 0.0f;
		genotype[13] = 0.0f;
		genotype[14] = 0.0f;
		genotype[15] = 0.0f;
		genotype[16] = 0.0f;
		genotype[17] = 0.0f;
		genotype[18] = 0.0f;
		genotype[19] = 0.0f;
		genotype[20] = 0.0f;
	}
	// Evaluating candidate
	barrier(CLK_LOCAL_MEM_FENCE);
lvs's avatar
lvs committed
232

lvs's avatar
lvs committed
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
	// =============================================================
	gpu_calc_energy(dockpars_rotbondlist_length,
			dockpars_num_of_atoms,
			dockpars_gridsize_x,
			dockpars_gridsize_y,
			dockpars_gridsize_z,
							    	// g1 = gridsize_x
			dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
			dockpars_fgrids,
			dockpars_num_of_atypes,
			dockpars_num_of_intraE_contributors,
			dockpars_grid_spacing,
			dockpars_coeff_elec,
			dockpars_qasp,
			dockpars_coeff_desolv,
			dockpars_smooth,
lvs's avatar
lvs committed
250

lvs's avatar
lvs committed
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
			genotype, /*WARNING: calculating the energy of the hardcoded genotype*/
			&energy,
			&run_id,
			// Some OpenCL compilers don't allow declaring 
			// local variables within non-kernel functions.
			// These local variables must be declared in a kernel, 
			// and then passed to non-kernel functions.
			calc_coords_x,
			calc_coords_y,
			calc_coords_z,
			partial_energies,
			#if defined (DEBUG_ENERGY_KERNEL)
			partial_interE,
			partial_intraE,
			#endif
#if 0
			true,
#endif
			kerconst_interintra,
			kerconst_intracontrib,
			kerconst_intra,
			kerconst_rotlist,
			kerconst_conform			
			);
	// =============================================================
lvs's avatar
lvs committed
276

lvs's avatar
lvs committed
277
278
279
280
281
282
283
284
	// WARNING: hardcoded has priority over LGA genotype.
	// That means, if DEBUG_INITIAL_2BRT is defined, then
	// LGA genotype is not used (only for debugging purposes)
	if (get_local_id(0) == 0)
	{
		printf("\n");
		printf("%20s \n", "hardcoded genotype: ");
		printf("%20s %.6f\n", "initial energy: ", energy);		
lvs's avatar
lvs committed
285
	}
lvs's avatar
lvs committed
286
287
	barrier(CLK_LOCAL_MEM_FENCE);
	#endif
lvs's avatar
lvs committed
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308

	// Calculating gradient
	barrier(CLK_LOCAL_MEM_FENCE);

	// =============================================================
	gpu_calc_gradient(
			dockpars_rotbondlist_length,
			dockpars_num_of_atoms,
			dockpars_gridsize_x,
			dockpars_gridsize_y,
			dockpars_gridsize_z,
							    	// g1 = gridsize_x
			dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
			dockpars_fgrids,
			dockpars_num_of_atypes,
			dockpars_num_of_intraE_contributors,
			dockpars_grid_spacing,
			dockpars_coeff_elec,
			dockpars_qasp,
			dockpars_coeff_desolv,
lvs's avatar
lvs committed
309
310
			dockpars_smooth,

lvs's avatar
lvs committed
311
312
313
314
315
316
317
318
319
320
321
322
			// Some OpenCL compilers don't allow declaring 
			// local variables within non-kernel functions.
			// These local variables must be declared in a kernel, 
			// and then passed to non-kernel functions.
			genotype,
			&energy,
			&run_id,

			calc_coords_x,
			calc_coords_y,
			calc_coords_z,

lvs's avatar
lvs committed
323
324
325
326
327
328
			kerconst_interintra,
			kerconst_intracontrib,
			kerconst_intra,
			kerconst_rotlist,
			kerconst_conform
			,
lvs's avatar
lvs committed
329
330
331
			rotbonds_const,
			rotbonds_atoms_const,
			num_rotating_atoms_per_rotbond_const
332
333
334
335
			,
	     		angle_const,
	     		dependence_on_theta_const,
	     		dependence_on_rotangle_const
lvs's avatar
lvs committed
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
		 	// Gradient-related arguments
		 	// Calculate gradients (forces) for intermolecular energy
		 	// Derived from autodockdev/maps.py
			,
			dockpars_num_of_genes,
			gradient_inter_x,
			gradient_inter_y,
			gradient_inter_z,
			gradient_intra_x,
			gradient_intra_y,
			gradient_intra_z,
			gradient
			);
	// =============================================================

351
352
353
	// FIRE counters
	__local float velocity [ACTUAL_GENOTYPE_LENGTH];// velocity
	__local float alpha;				// alpha
lvs's avatar
lvs committed
354
	__local uint  count_success;			// count_success
355
356
	__local float dt;				// "dt"

lvs's avatar
lvs committed
357
358
	// Calculating the gradient/velocity norm
	__local float gradient_tmp [ACTUAL_GENOTYPE_LENGTH];
359
	__local float inv_gradient_norm;
lvs's avatar
lvs committed
360
361
362
363
	__local float velocity_tmp [ACTUAL_GENOTYPE_LENGTH];
	__local float velocity_norm;
	__local float velnorm_div_gradnorm;

364
	// Defining FIRE power
lvs's avatar
lvs committed
365
366
367
	__local float power_tmp [ACTUAL_GENOTYPE_LENGTH];
	__local float power;

lvs's avatar
lvs committed
368
	// Calculating gradient-norm components
lvs's avatar
lvs committed
369
370
371
372
373
374
375
376
	for (uint gene_counter = get_local_id(0);
	          gene_counter < dockpars_num_of_genes;
	          gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
		 gradient_tmp [gene_counter] = gradient [gene_counter] * gradient [gene_counter];
	}
	barrier(CLK_LOCAL_MEM_FENCE);

	if (get_local_id(0) == 0) {
lvs's avatar
lvs committed
377
		// nitializing
lvs's avatar
lvs committed
378
379
380
381
		alpha         = ALPHA_START;
		count_success = 0;
		dt            = DT_MAX_DIV_THREE;

lvs's avatar
lvs committed
382
383
384
		// Initializing "gradient norm" to 0.0f,
		// but stored it in inv_gradient_norm
		inv_gradient_norm = 0.0f;
lvs's avatar
lvs committed
385
		
lvs's avatar
lvs committed
386
		// Summing up squares to continue calculation of "gradient-norm"
lvs's avatar
lvs committed
387
		for (uint i = 0; i < dockpars_num_of_genes; i++) {
lvs's avatar
lvs committed
388
			inv_gradient_norm += gradient_tmp [i];
lvs's avatar
lvs committed
389
390
		}
		
lvs's avatar
lvs committed
391
392
393
		// Note: ALPHA_START is included as a factor here
		inv_gradient_norm = native_sqrt( inv_gradient_norm);
		inv_gradient_norm = ALPHA_START * native_recip(inv_gradient_norm);
lvs's avatar
lvs committed
394
395
396
397
	}
	barrier(CLK_LOCAL_MEM_FENCE);

	// Starting velocity
lvs's avatar
lvs committed
398
	// This equation was found by trial and error
lvs's avatar
lvs committed
399
400
401
	for (uint gene_counter = get_local_id(0);
	          gene_counter < dockpars_num_of_genes;
	          gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
lvs's avatar
lvs committed
402
		 velocity [gene_counter] = - gradient [gene_counter] * inv_gradient_norm;
lvs's avatar
lvs committed
403
404
	}
	barrier(CLK_LOCAL_MEM_FENCE);
lvs's avatar
lvs committed
405

lvs's avatar
lvs committed
406
407
408
409
410
411
412
413
	// Keeping track of best genotype which 
	// may or may not be the last genotype
	if (get_local_id(0) == 0) {
		best_energy = energy;

		for (uint i = 0; i < dockpars_num_of_genes; i++) {
			best_genotype [i] = genotype [i];
		}
lvs's avatar
lvs committed
414
	}
lvs's avatar
lvs committed
415
416
417
	barrier(CLK_LOCAL_MEM_FENCE);

	// Perform fire iterations
lvs's avatar
lvs committed
418
419
420

	// The termination criteria is based on 
	// a maximum number of iterations, and
lvs's avatar
lvs committed
421
	// value of "dt" (fire specific)
lvs's avatar
lvs committed
422
	do {
lvs's avatar
lvs committed
423
424
		// Printing number of FIRE iterations
		#if defined (DEBUG_FIRE_MINIMIZER) 
lvs's avatar
lvs committed
425
		if (get_local_id(0) == 0) {
lvs's avatar
lvs committed
426
			printf("%s\n", "----------------------------------------------------------");	
lvs's avatar
lvs committed
427
		}
lvs's avatar
lvs committed
428
		#endif
lvs's avatar
lvs committed
429
		
lvs's avatar
lvs committed
430
431
432
		#if defined (DEBUG_FIRE_MINIMIZER) || defined (PRINT_FIRE_MINIMIZER_ENERGY_EVOLUTION)
		if (get_local_id(0) == 0) {
			printf("%-15s %-3u ", "# FIRE iteration: ", iteration_cnt);
lvs's avatar
lvs committed
433
		}
lvs's avatar
lvs committed
434
		#endif
lvs's avatar
lvs committed
435

lvs's avatar
lvs committed
436
		// Creating new (candidate) genotypes
lvs's avatar
lvs committed
437
438
439
		for (uint gene_counter = get_local_id(0);
	        	  gene_counter < dockpars_num_of_genes;
	        	  gene_counter+= NUM_OF_THREADS_PER_BLOCK) {			
lvs's avatar
lvs committed
440
			candidate_genotype [gene_counter] = genotype [gene_counter] + dt * velocity [gene_counter];	
lvs's avatar
lvs committed
441
442
		}

lvs's avatar
lvs committed
443
444
		// Calculating (candidate) gradient
		// from "candidate_genotype"
lvs's avatar
lvs committed
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
		barrier(CLK_LOCAL_MEM_FENCE);

		// =============================================================
		gpu_calc_gradient(
				dockpars_rotbondlist_length,
				dockpars_num_of_atoms,
				dockpars_gridsize_x,
				dockpars_gridsize_y,
				dockpars_gridsize_z,
								    	// g1 = gridsize_x
				dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
				dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
				dockpars_fgrids,
				dockpars_num_of_atypes,
				dockpars_num_of_intraE_contributors,
				dockpars_grid_spacing,
				dockpars_coeff_elec,
				dockpars_qasp,
				dockpars_coeff_desolv,
lvs's avatar
lvs committed
464
465
				dockpars_smooth,

lvs's avatar
lvs committed
466
467
468
469
				// Some OpenCL compilers don't allow declaring 
				// local variables within non-kernel functions.
				// These local variables must be declared in a kernel, 
				// and then passed to non-kernel functions.
lvs's avatar
lvs committed
470
471
				candidate_genotype,
				&candidate_energy,
lvs's avatar
lvs committed
472
473
474
475
476
477
				&run_id,

				calc_coords_x,
				calc_coords_y,
				calc_coords_z,

lvs's avatar
lvs committed
478
479
480
481
482
483
				kerconst_interintra,
				kerconst_intracontrib,
				kerconst_intra,
				kerconst_rotlist,
				kerconst_conform
				,
lvs's avatar
lvs committed
484
485
486
				rotbonds_const,
				rotbonds_atoms_const,
				num_rotating_atoms_per_rotbond_const
487
488
489
490
				,
	     			angle_const,
	     			dependence_on_theta_const,
	     			dependence_on_rotangle_const
lvs's avatar
lvs committed
491
492
493
494
495
496
497
498
499
500
501
			 	// Gradient-related arguments
			 	// Calculate gradients (forces) for intermolecular energy
			 	// Derived from autodockdev/maps.py
				,
				dockpars_num_of_genes,
				gradient_inter_x,
				gradient_inter_y,
				gradient_inter_z,
				gradient_intra_x,
				gradient_intra_y,
				gradient_intra_z,
lvs's avatar
lvs committed
502
				candidate_gradient
lvs's avatar
lvs committed
503
504
505
				);
		// =============================================================

lvs's avatar
lvs committed
506
507
		// Evaluating (candidate) genotype
		// i.e. get (candidate) energy
lvs's avatar
lvs committed
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
		barrier(CLK_LOCAL_MEM_FENCE);

		// =============================================================
		gpu_calc_energy(dockpars_rotbondlist_length,
				dockpars_num_of_atoms,
				dockpars_gridsize_x,
				dockpars_gridsize_y,
				dockpars_gridsize_z,
								    	// g1 = gridsize_x
				dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
				dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
				dockpars_fgrids,
				dockpars_num_of_atypes,
				dockpars_num_of_intraE_contributors,
				dockpars_grid_spacing,
				dockpars_coeff_elec,
				dockpars_qasp,
				dockpars_coeff_desolv,
lvs's avatar
lvs committed
526
527
				dockpars_smooth,

lvs's avatar
lvs committed
528
529
				candidate_genotype,
				&candidate_energy,
lvs's avatar
lvs committed
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
				&run_id,
				// Some OpenCL compilers don't allow declaring 
				// local variables within non-kernel functions.
				// These local variables must be declared in a kernel, 
				// and then passed to non-kernel functions.
				calc_coords_x,
				calc_coords_y,
				calc_coords_z,
				partial_energies,
				#if defined (DEBUG_ENERGY_KERNEL)
				partial_interE,
				partial_intraE,
				#endif
#if 0
				true,
#endif
lvs's avatar
lvs committed
546
547
548
549
550
				kerconst_interintra,
				kerconst_intracontrib,
				kerconst_intra,
				kerconst_rotlist,
				kerconst_conform
lvs's avatar
lvs committed
551
552
553
				);
		// =============================================================

lvs's avatar
lvs committed
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
		// Calculating power
        	// power is force * velocity.
        	// force = -gradient
		barrier(CLK_LOCAL_MEM_FENCE);

		for (uint gene_counter = get_local_id(0);
	        	  gene_counter < dockpars_num_of_genes;
	        	  gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
			
			// Calculating power
			power_tmp [gene_counter] = -candidate_gradient [gene_counter] * velocity [gene_counter];
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		if (get_local_id(0) == 0) {
			power = 0.0f;

			// Summing dot products
			for (uint i = 0; i < dockpars_num_of_genes; i++) {
				power += power_tmp [i];
			}
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		#if defined (DEBUG_ENERGY_FIRE)
		if (/*(get_group_id(0) == 0) &&*/ (get_local_id(0) == 0)) {
			#if defined (PRINT_FIRE_ENERGIES)
			printf("\n");
			printf("%-10s %-10.6f \n", "intra: ",  partial_intraE[0]);
			printf("%-10s %-10.6f \n", "grids: ",  partial_interE[0]);
			printf("%-10s %-10.6f \n", "Energy: ", (partial_intraE[0] + partial_interE[0]));
			#endif

			printf("\n");
			printf("%-15s %-10.6f \n", "energy: "     ,  energy);
			printf("%-15s %-10.6f \n", "best_energy: ",  best_energy);

			printf("\n%-15s %-10.6f \n","dt: "           ,  dt);
			printf("%-15s %-10.6f \n",  "power: "        ,  power);
			printf("%-15s %-10.6f \n",  "alpha: "        ,  alpha);
			printf("%-15s %-10u \n\n",   "count_success: " ,  count_success);

			#if defined (PRINT_FIRE_GENES_AND_GRADS)
lvs's avatar
lvs committed
597
			for(uint i = 0; i < dockpars_num_of_genes; i++) {
lvs's avatar
lvs committed
598
599
600
601
602
				if (i == 0) {
					//printf("\n%s\n", "----------------------------------------------------------");
					printf("%13s %13s %5s %15s %21s %15s\n", "gene_id", "genotype", "|", "gradient", "(autodockdevpy units)", "velocity");
				}
				printf("%13u %13.6f %5s %15.6f %21.6f %15.6f\n", i, genotype[i], "|", gradient[i], (i<3)? (gradient[i]/0.375f):(gradient[i]*180.0f/PI_FLOAT), velocity[i]);
lvs's avatar
lvs committed
603
			}
lvs's avatar
lvs committed
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625

			for(uint i = 0; i < dockpars_num_of_genes; i++) {
				if (i == 0) {
					//printf("\n%s\n", "----------------------------------------------------------");
					printf("\n");
					printf("%13s %13s %5s %15s\n", "gene_id", "cand_genotype", "|", "cand_gradient");
				}
				printf("%13u %13.6f %5s %15.6f\n", i, candidate_genotype[i], "|", candidate_gradient[i]);
			}
			#endif

			#if defined (PRINT_FIRE_ATOMIC_COORDS)
			for(uint i = 0; i < dockpars_num_of_atoms; i++) {
				if (i == 0) {
					printf("\n%s\n", "----------------------------------------------------------");
					printf("%s\n", "Coordinates calculated by calcenergy.cl");
					printf("%12s %12s %12s %12s\n", "atom_id", "coords.x", "coords.y", "coords.z");
				}
				printf("%12u %12.6f %12.6f %12.6f\n", i, calc_coords_x[i], calc_coords_y[i], calc_coords_z[i]);
			}
			printf("\n");
			#endif
lvs's avatar
lvs committed
626
		}
lvs's avatar
lvs committed
627
		barrier(CLK_LOCAL_MEM_FENCE);
lvs's avatar
lvs committed
628
629
		#endif

lvs's avatar
lvs committed
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
		// Going uphill (against the gradient)
		if (power < 0.0f) {

			// Using same equation as for starting velocity

			for (uint gene_counter = get_local_id(0);
				  gene_counter < dockpars_num_of_genes;
				  gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
				// Calculating gradient-norm
				gradient_tmp [gene_counter] = gradient [gene_counter] * gradient [gene_counter];
			}
			barrier(CLK_LOCAL_MEM_FENCE);

			if (get_local_id(0) == 0) {
				inv_gradient_norm = 0.0f;

				// Summing dot products
				for (uint i = 0; i < dockpars_num_of_genes; i++) {
					inv_gradient_norm += gradient_tmp [i];
				}

				// Note: ALPHA is included as a factor here
				inv_gradient_norm = native_sqrt(inv_gradient_norm);
				inv_gradient_norm = ALPHA_START * native_recip(inv_gradient_norm);
			}
			barrier(CLK_LOCAL_MEM_FENCE);

			for (uint gene_counter = get_local_id(0);
		        	  gene_counter < dockpars_num_of_genes;
		        	  gene_counter+= NUM_OF_THREADS_PER_BLOCK) {		
				velocity [gene_counter] = - gradient [gene_counter] * inv_gradient_norm;	
			}
			barrier(CLK_LOCAL_MEM_FENCE);

		 	if (get_local_id(0) == 0) {
				count_success = 0;
				alpha         = ALPHA_START;
				dt 	      = dt * DT_DEC; 

				#if defined PRINT_FIRE_PARAMETERS
				printf("\nPower is negative :( = %f\n", power);
				printf("\n %15s %10.7f\n %15s %10.7f\n", "alpha: ", alpha,  "dt: ", dt);
				#endif
			}
		}
		// Going downhill
		else {
			if (get_local_id(0) == 0) {
				count_success ++;

				#if defined PRINT_FIRE_PARAMETERS
				printf("\nPower is positive :) = %f\n", power);
				printf("\n %15s %10.7f\n %15s %10.7f\n", "old alpha: ", alpha, "old dt: ", dt);
				#endif

				// Reaching minimum number of consecutive successful steps (power >= 0)
				if (count_success > SUCCESS_MIN) {
					dt    = fmin (dt * DT_INC, DT_MAX);	// increase dt
					alpha = alpha * ALPHA_DEC; 		// decrease alpha

					#if defined PRINT_FIRE_PARAMETERS
					printf("\n count_success > %u\n", SUCCESS_MIN);
					printf("\n %10s %7.7f\n %10s %7.7f\n", "new alpha: ", alpha, "new dt: ", dt);
					#endif
				}
				else {
					#if defined PRINT_FIRE_PARAMETERS
					printf("\n count_success <= %u, do NOT update alpha or dt\n", SUCCESS_MIN);
					#endif
				}
			}
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		// --------------------------------------
		// Always update: energy, genotype, gradient
		for (uint gene_counter = get_local_id(0);
	        	  gene_counter < dockpars_num_of_genes;
	        	  gene_counter+= NUM_OF_THREADS_PER_BLOCK) {

			if (gene_counter == 0) {
				energy = candidate_energy;		
			}
	
			genotype [gene_counter] = candidate_genotype [gene_counter];
			gradient [gene_counter] = candidate_gradient [gene_counter];	
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		// --------------------------------------
		// Calculating gradient-norm
		for (uint gene_counter = get_local_id(0);
			  gene_counter < dockpars_num_of_genes;
			  gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
			gradient_tmp [gene_counter] = gradient [gene_counter] * gradient [gene_counter];
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		if (get_local_id(0) == 0) {
			inv_gradient_norm = 0.0f;

			// Summing dot products
			for (uint i = 0; i < dockpars_num_of_genes; i++) {
				inv_gradient_norm += gradient_tmp [i];
			}

			inv_gradient_norm = native_sqrt(inv_gradient_norm);
			inv_gradient_norm = native_recip(inv_gradient_norm);
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		// --------------------------------------
		// Calculating velocity-norm
		for (uint gene_counter = get_local_id(0);
			  gene_counter < dockpars_num_of_genes;
			  gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
			velocity_tmp [gene_counter] = velocity [gene_counter] * velocity [gene_counter];
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		if (get_local_id(0) == 0) {
			velocity_norm  = 0.0f;

			// Summing dot products
			for (uint i = 0; i < dockpars_num_of_genes; i++) {
				velocity_norm += velocity_tmp [i];
			}

			// Note: alpha is included as a factor here
			velocity_norm = native_sqrt(velocity_norm);
			velnorm_div_gradnorm = alpha * velocity_norm * inv_gradient_norm;
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		// --------------------------------------
		// Calculating velocity
		for (uint gene_counter = get_local_id(0);
			  gene_counter < dockpars_num_of_genes;
			  gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
			// NOTE: "velnorm_div_gradnorm" includes already the "alpha" factor
			velocity [gene_counter] = (1 - alpha) * velocity [gene_counter] - gradient [gene_counter] * velnorm_div_gradnorm;
		}
		barrier(CLK_LOCAL_MEM_FENCE);

		// --------------------------------------
lvs's avatar
lvs committed
775
776
777
778
		// Updating number of fire iterations (energy evaluations)
		if (get_local_id(0) == 0) {
	    		iteration_cnt = iteration_cnt + 1;

lvs's avatar
lvs committed
779
780
			#if defined (DEBUG_FIRE_MINIMIZER) || defined (PRINT_FIRE_MINIMIZER_ENERGY_EVOLUTION)
			printf("%20s %10.6f\n", "new.energy: ", energy);
lvs's avatar
lvs committed
781
782
			#endif

lvs's avatar
lvs committed
783
784
			#if defined (DEBUG_ENERGY_KERNEL6)
			printf("%-18s [%-5s]---{%-5s}   [%-10.7f]---{%-10.7f}\n", "-ENERGY-KERNEL6-", "GRIDS", "INTRA", partial_interE[0], partial_intraE[0]);
lvs's avatar
lvs committed
785
			#endif
lvs's avatar
lvs committed
786
787
788
789
790
791
792
793

			if (energy <  best_energy) {
				best_energy = energy;

				for(uint i = 0; i < dockpars_num_of_genes; i++) { 
					best_genotype[i] = genotype[i];
				}
			}
lvs's avatar
lvs committed
794
		}
lvs's avatar
lvs committed
795
		barrier(CLK_LOCAL_MEM_FENCE);
lvs's avatar
lvs committed
796

lvs's avatar
lvs committed
797
	} while ((iteration_cnt < dockpars_max_num_of_iters) && (dt > DT_MIN));
lvs's avatar
lvs committed
798
799
  	// -----------------------------------------------------------------------------
	// -----------------------------------------------------------------------------
lvs's avatar
lvs committed
800
801
802
803
804
	// -----------------------------------------------------------------------------

  	// Updating eval counter and energy
	if (get_local_id(0) == 0) {
		dockpars_evals_of_new_entities[run_id*dockpars_pop_size+entity_id] += iteration_cnt;
lvs's avatar
lvs committed
805
		dockpars_energies_next[run_id*dockpars_pop_size+entity_id] = best_energy;
lvs's avatar
lvs committed
806

lvs's avatar
lvs committed
807
808
809
810
		#if defined (DEBUG_FIRE_MINIMIZER) || defined (PRINT_FIRE_MINIMIZER_ENERGY_EVOLUTION)
		printf("\n");
		printf("Termination criteria: ( dt >= %10.10f ) OR ( #fire-iters >= %-3u )\n", DT_MIN, dockpars_max_num_of_iters);
		printf("-------> End of FIRE minimization cycle, num of energy evals: %u, final energy: %.6f\n", iteration_cnt, best_energy);
lvs's avatar
lvs committed
811
812
813
814
815
816
817
		#endif
	}

	// Mapping torsion angles
	for (uint gene_counter = get_local_id(0);
	     	  gene_counter < dockpars_num_of_genes;
	          gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
818
		   if (gene_counter >= 3) {
lvs's avatar
lvs committed
819
			    map_angle(&(best_genotype[gene_counter]));
lvs's avatar
lvs committed
820
821
822
823
824
825
		   }
	}

	// Updating old offspring in population
	barrier(CLK_LOCAL_MEM_FENCE);

826
	event_t ev2 = async_work_group_copy(dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
lvs's avatar
lvs committed
827
			                    best_genotype,
828
829
830
831
			                    dockpars_num_of_genes, 0);

	// Asynchronous copy should be finished by here
	wait_group_events(1, &ev2);
lvs's avatar
lvs committed
832
}