kernel3.cl 14.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*

OCLADock, an OpenCL implementation of AutoDock 4.2 running a Lamarckian Genetic Algorithm
Copyright (C) 2017 TU Darmstadt, Embedded Systems and Applications Group, Germany. All rights reserved.

AutoDock is a Trade Mark of the Scripps Research Institute.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

*/


Leonardo Solis's avatar
Leonardo Solis committed
25
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
26
27
28
29
30
31
32
perform_LS(		
			char   dockpars_num_of_atoms,
			char   dockpars_num_of_atypes,
			int    dockpars_num_of_intraE_contributors,
			char   dockpars_gridsize_x,
			char   dockpars_gridsize_y,
			char   dockpars_gridsize_z,
33
34
35
							    		// g1 = gridsize_x
  			uint   dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			uint   dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
			float  dockpars_grid_spacing,
         __global const float* restrict dockpars_fgrids, // This is too large to be allocated in __constant 
	        	int    dockpars_rotbondlist_length,
			float  dockpars_coeff_elec,
			float  dockpars_coeff_desolv,
  	 __global       float* restrict dockpars_conformations_next,
  	 __global 	float* restrict dockpars_energies_next,
  	 __global 	int*   restrict dockpars_evals_of_new_entities,
  	 __global 	uint*  restrict dockpars_prng_states,
			int    dockpars_pop_size,
			int    dockpars_num_of_genes,
			float  dockpars_lsearch_rate,
			uint   dockpars_num_of_lsentities,
			float  dockpars_rho_lower_bound,
			float  dockpars_base_dmov_mul_sqrt3,
			float  dockpars_base_dang_mul_sqrt3,
			uint   dockpars_cons_limit,
			uint   dockpars_max_num_of_iters,
			float  dockpars_qasp,
	     __constant float* atom_charges_const,
             __constant char*  atom_types_const,
	     __constant char*  intraE_contributors_const,
             __constant float* VWpars_AC_const,
             __constant float* VWpars_BD_const,
             __constant float* dspars_S_const,
             __constant float* dspars_V_const,
             __constant int*   rotlist_const,
             __constant float* ref_coords_x_const,
             __constant float* ref_coords_y_const,
             __constant float* ref_coords_z_const,
             __constant float* rotbonds_moving_vectors_const,
             __constant float* rotbonds_unit_vectors_const,
             __constant float* ref_orientation_quats_const
Leonardo Solis's avatar
Leonardo Solis committed
69
70
71
72
73
74
75
76
77
)
//The GPU global function performs local search on the pre-defined entities of conformations_next.
//The number of blocks which should be started equals to num_of_lsentities*num_of_runs.
//This way the first num_of_lsentities entity of each population will be subjected to local search
//(and each block carries out the algorithm for one entity).
//Since the first entity is always the best one in the current population,
//it is always tested according to the ls probability, and if it not to be
//subjected to local search, the entity with ID num_of_lsentities is selected instead of the first one (with ID 0).
{
78
79
80
81
	// Some OpenCL compilers don't allow declaring 
	// local variables within non-kernel functions.
	// These local variables must be declared in a kernel, 
	// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
82
83
84
	__local float genotype_candidate[ACTUAL_GENOTYPE_LENGTH];
	__local float genotype_deviate  [ACTUAL_GENOTYPE_LENGTH];
	__local float genotype_bias     [ACTUAL_GENOTYPE_LENGTH];
Leonardo Solis's avatar
Leonardo Solis committed
85
        __local float rho;
Leonardo Solis's avatar
Leonardo Solis committed
86
87
88
89
90
91
92
93
94
95
96
97
	__local int   cons_succ;
	__local int   cons_fail;
	__local int   iteration_cnt;
	__local float candidate_energy;
	__local int   evaluation_cnt;
	int gene_counter;

	__local float offspring_genotype[ACTUAL_GENOTYPE_LENGTH];
	__local int run_id;
	__local int entity_id;
	__local float offspring_energy;

Leonardo Solis's avatar
Leonardo Solis committed
98
99
100
101
102
	__local float calc_coords_x[MAX_NUM_OF_ATOMS];
	__local float calc_coords_y[MAX_NUM_OF_ATOMS];
	__local float calc_coords_z[MAX_NUM_OF_ATOMS];
	__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];

103
	#if defined (DEBUG_ENERGY_KERNEL3)
104
105
106
107
	__local float partial_interE [NUM_OF_THREADS_PER_BLOCK];
	__local float partial_intraE [NUM_OF_THREADS_PER_BLOCK];
	#endif

108
109
	// Determining run ID and entity ID
	// Initializing offspring genotype
Leonardo Solis's avatar
Leonardo Solis committed
110
111
112
113
114
	if (get_local_id(0) == 0)
	{
		run_id = get_group_id(0) / dockpars_num_of_lsentities;
		entity_id = get_group_id(0) % dockpars_num_of_lsentities;

115
116
117
118
119
120
121
122
123
		// Since entity 0 is the best one due to elitism,
		// it should be subjected to random selection
		if (entity_id == 0) {
			// If entity 0 is not selected according to LS-rate,
			// choosing an other entity
			if (100.0f*gpu_randf(dockpars_prng_states) > dockpars_lsearch_rate) {
				entity_id = dockpars_num_of_lsentities;					
			}
		}
Leonardo Solis's avatar
Leonardo Solis committed
124
125
126
127
128
129

		offspring_energy = dockpars_energies_next[run_id*dockpars_pop_size+entity_id];
	}

	barrier(CLK_LOCAL_MEM_FENCE);

130
131
132
133
134
135
136
  	async_work_group_copy(offspring_genotype,
			      dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
                              dockpars_num_of_genes, 0);

	for (gene_counter = get_local_id(0);
	     gene_counter < dockpars_num_of_genes;
	     gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
Leonardo Solis's avatar
Leonardo Solis committed
137
		   genotype_bias[gene_counter] = 0.0f;
138
	}
Leonardo Solis's avatar
Leonardo Solis committed
139
140
141
142
143
144
145
146
147
148
149
150
151

	if (get_local_id(0) == 0) {
		rho = 1.0f;
		cons_succ = 0;
		cons_fail = 0;
		iteration_cnt = 0;
		evaluation_cnt = 0;
	}

	barrier(CLK_LOCAL_MEM_FENCE);

	while ((iteration_cnt < dockpars_max_num_of_iters) && (rho > dockpars_rho_lower_bound))
	{
152
153
154
155
		// New random deviate
		for (gene_counter = get_local_id(0);
		     gene_counter < dockpars_num_of_genes;
		     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
156
157
158
		{
			genotype_deviate[gene_counter] = rho*(2*gpu_randf(dockpars_prng_states)-1);

159
160
			// Translation genes
			if (gene_counter <= 2)
161
				genotype_deviate[gene_counter] *= dockpars_base_dmov_mul_sqrt3;
162
163
			
			// Shoemake orientation-genes do not use initial deviation
164

165
166
			// Torsion genes
			else if (gene_counter >= 6) 
Leonardo Solis's avatar
Leonardo Solis committed
167
				genotype_deviate[gene_counter] *= dockpars_base_dang_mul_sqrt3;
168

Leonardo Solis's avatar
Leonardo Solis committed
169
170
		}

171
172
173
174
175
176
177
		// Generating new genotype candidate
		for (gene_counter = get_local_id(0);
		     gene_counter < dockpars_num_of_genes;
		     gene_counter+= NUM_OF_THREADS_PER_BLOCK) {

			// Shoemake genes (u1, u2, u3) ranges between [0,1]
			if ((gene_counter >= 3) && (gene_counter <= 5)) { 
178
			   genotype_candidate[gene_counter] = gpu_randf(dockpars_prng_states);
179
			}
180
			// Other genes: translation and torsions
181
			else {
182
183
184
			   genotype_candidate[gene_counter] = offspring_genotype[gene_counter] + 
							      genotype_deviate[gene_counter]   + 
							      genotype_bias[gene_counter];
185
			}
186
187
		}

188
		// Evaluating candidate
Leonardo Solis's avatar
Leonardo Solis committed
189
190
191
192
		barrier(CLK_LOCAL_MEM_FENCE);

		// ==================================================================
		gpu_calc_energy(dockpars_rotbondlist_length,
Leonardo Solis's avatar
Leonardo Solis committed
193
194
195
196
				dockpars_num_of_atoms,
				dockpars_gridsize_x,
				dockpars_gridsize_y,
				dockpars_gridsize_z,
197
198
199
								    	// g1 = gridsize_x
				dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
				dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
Leonardo Solis's avatar
Leonardo Solis committed
200
201
202
203
204
205
206
207
208
209
				dockpars_fgrids,
				dockpars_num_of_atypes,
				dockpars_num_of_intraE_contributors,
				dockpars_grid_spacing,
				dockpars_coeff_elec,
				dockpars_qasp,
				dockpars_coeff_desolv,
				genotype_candidate,
				&candidate_energy,
				&run_id,
210
211
212
213
				// Some OpenCL compilers don't allow declaring 
				// local variables within non-kernel functions.
				// These local variables must be declared in a kernel, 
				// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
214
215
216
217
				calc_coords_x,
				calc_coords_y,
				calc_coords_z,
				partial_energies,
218
				#if defined (DEBUG_ENERGY_KERNEL3)
219
220
221
				partial_interE,
				partial_intraE,
				#endif
Leonardo Solis's avatar
Leonardo Solis committed
222
223
224
225
226
227
228
229
230
231
232
233
234
235

				atom_charges_const,
				atom_types_const,
				intraE_contributors_const,
				VWpars_AC_const,
				VWpars_BD_const,
				dspars_S_const,
				dspars_V_const,
				rotlist_const,
				ref_coords_x_const,
				ref_coords_y_const,
				ref_coords_z_const,
				rotbonds_moving_vectors_const,
				rotbonds_unit_vectors_const,
236
237
				ref_orientation_quats_const
				);
Leonardo Solis's avatar
Leonardo Solis committed
238
239
		// =================================================================

240
		if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
241
			evaluation_cnt++;
242
243
244
245

			#if defined (DEBUG_ENERGY_KERNEL3)
			printf("%-18s [%-5s]---{%-5s}   [%-10.8f]---{%-10.8f}\n", "-ENERGY-KERNEL3-", "GRIDS", "INTRA", partial_interE[0], partial_intraE[0]);
			#endif
246
		}
Leonardo Solis's avatar
Leonardo Solis committed
247
248
249

		barrier(CLK_LOCAL_MEM_FENCE);

250
		if (candidate_energy < offspring_energy)	// If candidate is better, success
Leonardo Solis's avatar
Leonardo Solis committed
251
		{
252
253
254
			for (gene_counter = get_local_id(0);
			     gene_counter < dockpars_num_of_genes;
			     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
255
			{
256
				// Updating offspring_genotype
Leonardo Solis's avatar
Leonardo Solis committed
257
258
				offspring_genotype[gene_counter] = genotype_candidate[gene_counter];

259
				// Updating genotype_bias
Leonardo Solis's avatar
Leonardo Solis committed
260
261
262
				genotype_bias[gene_counter] = 0.6f*genotype_bias[gene_counter] + 0.4f*genotype_deviate[gene_counter];
			}

263
264
			// Work-item 0 will overwrite the shared variables
			// used in the previous if condition
Leonardo Solis's avatar
Leonardo Solis committed
265
266
267
268
269
270
271
272
273
			barrier(CLK_LOCAL_MEM_FENCE);

			if (get_local_id(0) == 0)
			{
				offspring_energy = candidate_energy;
				cons_succ++;
				cons_fail = 0;
			}
		}
274
		else	// If candidate is worser, check the opposite direction
Leonardo Solis's avatar
Leonardo Solis committed
275
		{
276
277
278
279
			// Generating the other genotype candidate
			for (gene_counter = get_local_id(0);
			     gene_counter < dockpars_num_of_genes;
			     gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
280

281
282
				// Shoemake genes (u1, u2, u3) ranges between [0,1]
				if ((gene_counter >= 3) && (gene_counter <= 5)) {
283
				   genotype_candidate[gene_counter] = gpu_randf(dockpars_prng_states);
284
				}
285
				// Other genes: translation and torsions
286
				else {
287
288
289
				   genotype_candidate[gene_counter] = offspring_genotype[gene_counter] - 
								      genotype_deviate[gene_counter] - 
								      genotype_bias[gene_counter];
290
291
292
				}
			}

Leonardo Solis's avatar
Leonardo Solis committed
293
294
295
296
297
			//evaluating candidate
			barrier(CLK_LOCAL_MEM_FENCE);

			// =================================================================
			gpu_calc_energy(dockpars_rotbondlist_length,
Leonardo Solis's avatar
Leonardo Solis committed
298
299
300
301
					dockpars_num_of_atoms,
					dockpars_gridsize_x,
					dockpars_gridsize_y,
					dockpars_gridsize_z,
302
303
304
									    	// g1 = gridsize_x
					dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
					dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
Leonardo Solis's avatar
Leonardo Solis committed
305
306
307
308
309
310
311
312
313
314
					dockpars_fgrids,
					dockpars_num_of_atypes,
					dockpars_num_of_intraE_contributors,
					dockpars_grid_spacing,
					dockpars_coeff_elec,
				        dockpars_qasp,
					dockpars_coeff_desolv,
					genotype_candidate,
					&candidate_energy,
					&run_id,
315
316
317
318
					// Some OpenCL compilers don't allow declaring 
					// local variables within non-kernel functions.
					// These local variables must be declared in a kernel, 
					// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
319
320
321
322
					calc_coords_x,
					calc_coords_y,
					calc_coords_z,
					partial_energies,
323
					#if defined (DEBUG_ENERGY_KERNEL3)
324
325
326
					partial_interE,
					partial_intraE,
					#endif
Leonardo Solis's avatar
Leonardo Solis committed
327
328
329
330
331
332
333
334
335
336
337
338
339
340

					atom_charges_const,
					atom_types_const,
					intraE_contributors_const,
					VWpars_AC_const,
					VWpars_BD_const,
					dspars_S_const,
					dspars_V_const,
					rotlist_const,
					ref_coords_x_const,
					ref_coords_y_const,
					ref_coords_z_const,
					rotbonds_moving_vectors_const,
					rotbonds_unit_vectors_const,
341
342
					ref_orientation_quats_const
					);
Leonardo Solis's avatar
Leonardo Solis committed
343
344
			// =================================================================

345
			if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
346
				evaluation_cnt++;
347
348
349
350

				#if defined (DEBUG_ENERGY_KERNEL3)
				printf("%-18s [%-5s]---{%-5s}   [%-10.8f]---{%-10.8f}\n", "-ENERGY-KERNEL3-", "GRIDS", "INTRA", partial_interE[0], partial_intraE[0]);
				#endif
351
			}
Leonardo Solis's avatar
Leonardo Solis committed
352
353
354

			barrier(CLK_LOCAL_MEM_FENCE);

355
			if (candidate_energy < offspring_energy) // If candidate is better, success
Leonardo Solis's avatar
Leonardo Solis committed
356
			{
357
358
359
				for (gene_counter = get_local_id(0);
				     gene_counter < dockpars_num_of_genes;
			       	     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
360
				{
361
					// Updating offspring_genotype
Leonardo Solis's avatar
Leonardo Solis committed
362
363
					offspring_genotype[gene_counter] = genotype_candidate[gene_counter];

364
					// Updating genotype_bias
Leonardo Solis's avatar
Leonardo Solis committed
365
366
367
					genotype_bias[gene_counter] = 0.6f*genotype_bias[gene_counter] - 0.4f*genotype_deviate[gene_counter];
				}

368
369
				// Work-item 0 will overwrite the shared variables
				// used in the previous if condition
Leonardo Solis's avatar
Leonardo Solis committed
370
371
372
373
374
375
376
377
378
				barrier(CLK_LOCAL_MEM_FENCE);

				if (get_local_id(0) == 0)
				{
					offspring_energy = candidate_energy;
					cons_succ++;
					cons_fail = 0;
				}
			}
379
			else	// Failure in both directions
Leonardo Solis's avatar
Leonardo Solis committed
380
			{
381
382
383
384
				for (gene_counter = get_local_id(0);
				     gene_counter < dockpars_num_of_genes;
				     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
					   // Updating genotype_bias
Leonardo Solis's avatar
Leonardo Solis committed
385
386
387
388
389
390
391
392
393
394
					   genotype_bias[gene_counter] = 0.5f*genotype_bias[gene_counter];

				if (get_local_id(0) == 0)
				{
					cons_succ = 0;
					cons_fail++;
				}
			}
		}

395
		// Changing rho if needed
Leonardo Solis's avatar
Leonardo Solis committed
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
		if (get_local_id(0) == 0)
		{
			iteration_cnt++;

			if (cons_succ >= dockpars_cons_limit)
			{
				rho *= LS_EXP_FACTOR;
				cons_succ = 0;
			}
			else
				if (cons_fail >= dockpars_cons_limit)
				{
					rho *= LS_CONT_FACTOR;
					cons_fail = 0;
				}
		}
		barrier(CLK_LOCAL_MEM_FENCE);
	}

415
416
	// Updating eval counter and energy
	if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
417
418
419
420
		dockpars_evals_of_new_entities[run_id*dockpars_pop_size+entity_id] += evaluation_cnt;
		dockpars_energies_next[run_id*dockpars_pop_size+entity_id] = offspring_energy;
	}

421
422
423
424
425
	// Mapping torsion angles
	for (gene_counter = get_local_id(0);
	     gene_counter < dockpars_num_of_genes;
	     gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
		   if (gene_counter >= 6) {
Leonardo Solis's avatar
Leonardo Solis committed
426
			    map_angle(&(offspring_genotype[gene_counter]));
427
428
		   }
	}
Leonardo Solis's avatar
Leonardo Solis committed
429

430
	// Updating old offspring in population
Leonardo Solis's avatar
Leonardo Solis committed
431
432
	barrier(CLK_LOCAL_MEM_FENCE);

433
434
435
  	async_work_group_copy(dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
        	              offspring_genotype,
        	              dockpars_num_of_genes,0);
Leonardo Solis's avatar
Leonardo Solis committed
436
}