kernel3.cl 13 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*

OCLADock, an OpenCL implementation of AutoDock 4.2 running a Lamarckian Genetic Algorithm
Copyright (C) 2017 TU Darmstadt, Embedded Systems and Applications Group, Germany. All rights reserved.

AutoDock is a Trade Mark of the Scripps Research Institute.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

*/


Leonardo Solis's avatar
Leonardo Solis committed
25
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
26
27
28
29
30
31
32
perform_LS(		
			char   dockpars_num_of_atoms,
			char   dockpars_num_of_atypes,
			int    dockpars_num_of_intraE_contributors,
			char   dockpars_gridsize_x,
			char   dockpars_gridsize_y,
			char   dockpars_gridsize_z,
33
34
35
							    		// g1 = gridsize_x
  			uint   dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			uint   dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
			float  dockpars_grid_spacing,
         __global const float* restrict dockpars_fgrids, // This is too large to be allocated in __constant 
	        	int    dockpars_rotbondlist_length,
			float  dockpars_coeff_elec,
			float  dockpars_coeff_desolv,
  	 __global       float* restrict dockpars_conformations_next,
  	 __global 	float* restrict dockpars_energies_next,
  	 __global 	int*   restrict dockpars_evals_of_new_entities,
  	 __global 	uint*  restrict dockpars_prng_states,
			int    dockpars_pop_size,
			int    dockpars_num_of_genes,
			float  dockpars_lsearch_rate,
			uint   dockpars_num_of_lsentities,
			float  dockpars_rho_lower_bound,
			float  dockpars_base_dmov_mul_sqrt3,
			float  dockpars_base_dang_mul_sqrt3,
			uint   dockpars_cons_limit,
			uint   dockpars_max_num_of_iters,
			float  dockpars_qasp,
lvs's avatar
lvs committed
55
56
57
58
59
60
61
			float  dockpars_smooth,

	 __constant     kernelconstant_interintra* 	kerconst_interintra,
	 __global const kernelconstant_intracontrib*  	kerconst_intracontrib,
	 __constant     kernelconstant_intra*		kerconst_intra,
	 __constant     kernelconstant_rotlist*   	kerconst_rotlist,
	 __constant     kernelconstant_conform*		kerconst_conform
Leonardo Solis's avatar
Leonardo Solis committed
62
63
64
65
66
67
68
69
70
)
//The GPU global function performs local search on the pre-defined entities of conformations_next.
//The number of blocks which should be started equals to num_of_lsentities*num_of_runs.
//This way the first num_of_lsentities entity of each population will be subjected to local search
//(and each block carries out the algorithm for one entity).
//Since the first entity is always the best one in the current population,
//it is always tested according to the ls probability, and if it not to be
//subjected to local search, the entity with ID num_of_lsentities is selected instead of the first one (with ID 0).
{
71
72
73
74
	// Some OpenCL compilers don't allow declaring 
	// local variables within non-kernel functions.
	// These local variables must be declared in a kernel, 
	// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
75
76
77
	__local float genotype_candidate[ACTUAL_GENOTYPE_LENGTH];
	__local float genotype_deviate  [ACTUAL_GENOTYPE_LENGTH];
	__local float genotype_bias     [ACTUAL_GENOTYPE_LENGTH];
Leonardo Solis's avatar
Leonardo Solis committed
78
        __local float rho;
Leonardo Solis's avatar
Leonardo Solis committed
79
80
81
82
83
84
85
86
87
88
89
90
	__local int   cons_succ;
	__local int   cons_fail;
	__local int   iteration_cnt;
	__local float candidate_energy;
	__local int   evaluation_cnt;
	int gene_counter;

	__local float offspring_genotype[ACTUAL_GENOTYPE_LENGTH];
	__local int run_id;
	__local int entity_id;
	__local float offspring_energy;

Leonardo Solis's avatar
Leonardo Solis committed
91
92
93
94
95
	__local float calc_coords_x[MAX_NUM_OF_ATOMS];
	__local float calc_coords_y[MAX_NUM_OF_ATOMS];
	__local float calc_coords_z[MAX_NUM_OF_ATOMS];
	__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];

lvs's avatar
lvs committed
96
	#if defined (DEBUG_ENERGY_KERNEL)
97
98
99
100
	__local float partial_interE [NUM_OF_THREADS_PER_BLOCK];
	__local float partial_intraE [NUM_OF_THREADS_PER_BLOCK];
	#endif

101
102
	// Determining run ID and entity ID
	// Initializing offspring genotype
Leonardo Solis's avatar
Leonardo Solis committed
103
104
105
106
107
	if (get_local_id(0) == 0)
	{
		run_id = get_group_id(0) / dockpars_num_of_lsentities;
		entity_id = get_group_id(0) % dockpars_num_of_lsentities;

108
109
110
111
112
113
114
115
116
		// Since entity 0 is the best one due to elitism,
		// it should be subjected to random selection
		if (entity_id == 0) {
			// If entity 0 is not selected according to LS-rate,
			// choosing an other entity
			if (100.0f*gpu_randf(dockpars_prng_states) > dockpars_lsearch_rate) {
				entity_id = dockpars_num_of_lsentities;					
			}
		}
Leonardo Solis's avatar
Leonardo Solis committed
117
118
119
120
121
122

		offspring_energy = dockpars_energies_next[run_id*dockpars_pop_size+entity_id];
	}

	barrier(CLK_LOCAL_MEM_FENCE);

lvs's avatar
lvs committed
123
124
125
  	event_t ev = async_work_group_copy(offspring_genotype,
			      		   dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
                              		   dockpars_num_of_genes, 0);
126
127
128
129

	for (gene_counter = get_local_id(0);
	     gene_counter < dockpars_num_of_genes;
	     gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
Leonardo Solis's avatar
Leonardo Solis committed
130
		   genotype_bias[gene_counter] = 0.0f;
131
	}
Leonardo Solis's avatar
Leonardo Solis committed
132
133
134
135
136
137
138
139
140

	if (get_local_id(0) == 0) {
		rho = 1.0f;
		cons_succ = 0;
		cons_fail = 0;
		iteration_cnt = 0;
		evaluation_cnt = 0;
	}

lvs's avatar
lvs committed
141
142

	// Asynchronous copy should be finished by here
143
	wait_group_events(1, &ev);
lvs's avatar
lvs committed
144

Leonardo Solis's avatar
Leonardo Solis committed
145
146
147
148
	barrier(CLK_LOCAL_MEM_FENCE);

	while ((iteration_cnt < dockpars_max_num_of_iters) && (rho > dockpars_rho_lower_bound))
	{
149
150
151
152
		// New random deviate
		for (gene_counter = get_local_id(0);
		     gene_counter < dockpars_num_of_genes;
		     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
153
154
155
		{
			genotype_deviate[gene_counter] = rho*(2*gpu_randf(dockpars_prng_states)-1);

156
			// Translation genes
157
			if (gene_counter < 3) {
158
				genotype_deviate[gene_counter] *= dockpars_base_dmov_mul_sqrt3;
159
160
161
			}
			// Orientation and torsion genes
			else {
Leonardo Solis's avatar
Leonardo Solis committed
162
				genotype_deviate[gene_counter] *= dockpars_base_dang_mul_sqrt3;
163
			}
Leonardo Solis's avatar
Leonardo Solis committed
164
165
		}

166
167
168
169
170
171
172
		// Generating new genotype candidate
		for (gene_counter = get_local_id(0);
		     gene_counter < dockpars_num_of_genes;
		     gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
			   genotype_candidate[gene_counter] = offspring_genotype[gene_counter] + 
							      genotype_deviate[gene_counter]   + 
							      genotype_bias[gene_counter];
173
174
		}

175
		// Evaluating candidate
Leonardo Solis's avatar
Leonardo Solis committed
176
177
178
179
		barrier(CLK_LOCAL_MEM_FENCE);

		// ==================================================================
		gpu_calc_energy(dockpars_rotbondlist_length,
Leonardo Solis's avatar
Leonardo Solis committed
180
181
182
183
				dockpars_num_of_atoms,
				dockpars_gridsize_x,
				dockpars_gridsize_y,
				dockpars_gridsize_z,
184
185
186
								    	// g1 = gridsize_x
				dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
				dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
Leonardo Solis's avatar
Leonardo Solis committed
187
188
189
190
191
192
193
				dockpars_fgrids,
				dockpars_num_of_atypes,
				dockpars_num_of_intraE_contributors,
				dockpars_grid_spacing,
				dockpars_coeff_elec,
				dockpars_qasp,
				dockpars_coeff_desolv,
lvs's avatar
lvs committed
194
195
				dockpars_smooth,

Leonardo Solis's avatar
Leonardo Solis committed
196
197
198
				genotype_candidate,
				&candidate_energy,
				&run_id,
199
200
201
202
				// Some OpenCL compilers don't allow declaring 
				// local variables within non-kernel functions.
				// These local variables must be declared in a kernel, 
				// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
203
204
205
206
				calc_coords_x,
				calc_coords_y,
				calc_coords_z,
				partial_energies,
lvs's avatar
lvs committed
207
				#if defined (DEBUG_ENERGY_KERNEL)
208
209
210
				partial_interE,
				partial_intraE,
				#endif
lvs's avatar
lvs committed
211
212
213
#if 0
				false,
#endif
lvs's avatar
lvs committed
214
215
216
217
218
			   	kerconst_interintra,
			   	kerconst_intracontrib,
			   	kerconst_intra,
			   	kerconst_rotlist,
			   	kerconst_conform
219
				);
Leonardo Solis's avatar
Leonardo Solis committed
220
221
		// =================================================================

222
		if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
223
			evaluation_cnt++;
224

lvs's avatar
lvs committed
225
			#if defined (DEBUG_ENERGY_KERNEL)
226
227
			printf("%-18s [%-5s]---{%-5s}   [%-10.8f]---{%-10.8f}\n", "-ENERGY-KERNEL3-", "GRIDS", "INTRA", partial_interE[0], partial_intraE[0]);
			#endif
228
		}
Leonardo Solis's avatar
Leonardo Solis committed
229
230
231

		barrier(CLK_LOCAL_MEM_FENCE);

232
		if (candidate_energy < offspring_energy)	// If candidate is better, success
Leonardo Solis's avatar
Leonardo Solis committed
233
		{
234
235
236
			for (gene_counter = get_local_id(0);
			     gene_counter < dockpars_num_of_genes;
			     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
237
			{
238
				// Updating offspring_genotype
Leonardo Solis's avatar
Leonardo Solis committed
239
240
				offspring_genotype[gene_counter] = genotype_candidate[gene_counter];

241
				// Updating genotype_bias
Leonardo Solis's avatar
Leonardo Solis committed
242
243
244
				genotype_bias[gene_counter] = 0.6f*genotype_bias[gene_counter] + 0.4f*genotype_deviate[gene_counter];
			}

245
246
			// Work-item 0 will overwrite the shared variables
			// used in the previous if condition
Leonardo Solis's avatar
Leonardo Solis committed
247
248
249
250
251
252
253
254
255
			barrier(CLK_LOCAL_MEM_FENCE);

			if (get_local_id(0) == 0)
			{
				offspring_energy = candidate_energy;
				cons_succ++;
				cons_fail = 0;
			}
		}
256
		else	// If candidate is worser, check the opposite direction
Leonardo Solis's avatar
Leonardo Solis committed
257
		{
258
259
260
261
262
263
264
			// Generating the other genotype candidate
			for (gene_counter = get_local_id(0);
			     gene_counter < dockpars_num_of_genes;
			     gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
				   genotype_candidate[gene_counter] = offspring_genotype[gene_counter] - 
								      genotype_deviate[gene_counter] - 
								      genotype_bias[gene_counter];
265
266
			}

267
			// Evaluating candidate
Leonardo Solis's avatar
Leonardo Solis committed
268
269
270
271
			barrier(CLK_LOCAL_MEM_FENCE);

			// =================================================================
			gpu_calc_energy(dockpars_rotbondlist_length,
Leonardo Solis's avatar
Leonardo Solis committed
272
273
274
275
					dockpars_num_of_atoms,
					dockpars_gridsize_x,
					dockpars_gridsize_y,
					dockpars_gridsize_z,
276
277
278
									    	// g1 = gridsize_x
					dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
					dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
Leonardo Solis's avatar
Leonardo Solis committed
279
280
281
282
283
284
285
					dockpars_fgrids,
					dockpars_num_of_atypes,
					dockpars_num_of_intraE_contributors,
					dockpars_grid_spacing,
					dockpars_coeff_elec,
				        dockpars_qasp,
					dockpars_coeff_desolv,
lvs's avatar
lvs committed
286
287
					dockpars_smooth,

Leonardo Solis's avatar
Leonardo Solis committed
288
289
290
					genotype_candidate,
					&candidate_energy,
					&run_id,
291
292
293
294
					// Some OpenCL compilers don't allow declaring 
					// local variables within non-kernel functions.
					// These local variables must be declared in a kernel, 
					// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
295
296
297
298
					calc_coords_x,
					calc_coords_y,
					calc_coords_z,
					partial_energies,
lvs's avatar
lvs committed
299
					#if defined (DEBUG_ENERGY_KERNEL)
300
301
302
					partial_interE,
					partial_intraE,
					#endif
lvs's avatar
lvs committed
303
304
305
#if 0
					false,
#endif
lvs's avatar
lvs committed
306
307
308
309
310
				   	kerconst_interintra,
				   	kerconst_intracontrib,
				   	kerconst_intra,
				   	kerconst_rotlist,
				   	kerconst_conform
311
					);
Leonardo Solis's avatar
Leonardo Solis committed
312
313
			// =================================================================

314
			if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
315
				evaluation_cnt++;
316

lvs's avatar
lvs committed
317
				#if defined (DEBUG_ENERGY_KERNEL)
318
319
				printf("%-18s [%-5s]---{%-5s}   [%-10.8f]---{%-10.8f}\n", "-ENERGY-KERNEL3-", "GRIDS", "INTRA", partial_interE[0], partial_intraE[0]);
				#endif
320
			}
Leonardo Solis's avatar
Leonardo Solis committed
321
322
323

			barrier(CLK_LOCAL_MEM_FENCE);

324
			if (candidate_energy < offspring_energy) // If candidate is better, success
Leonardo Solis's avatar
Leonardo Solis committed
325
			{
326
327
328
				for (gene_counter = get_local_id(0);
				     gene_counter < dockpars_num_of_genes;
			       	     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
329
				{
330
					// Updating offspring_genotype
Leonardo Solis's avatar
Leonardo Solis committed
331
332
					offspring_genotype[gene_counter] = genotype_candidate[gene_counter];

333
					// Updating genotype_bias
Leonardo Solis's avatar
Leonardo Solis committed
334
335
336
					genotype_bias[gene_counter] = 0.6f*genotype_bias[gene_counter] - 0.4f*genotype_deviate[gene_counter];
				}

337
338
				// Work-item 0 will overwrite the shared variables
				// used in the previous if condition
Leonardo Solis's avatar
Leonardo Solis committed
339
340
341
342
343
344
345
346
347
				barrier(CLK_LOCAL_MEM_FENCE);

				if (get_local_id(0) == 0)
				{
					offspring_energy = candidate_energy;
					cons_succ++;
					cons_fail = 0;
				}
			}
348
			else	// Failure in both directions
Leonardo Solis's avatar
Leonardo Solis committed
349
			{
350
351
352
353
				for (gene_counter = get_local_id(0);
				     gene_counter < dockpars_num_of_genes;
				     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
					   // Updating genotype_bias
Leonardo Solis's avatar
Leonardo Solis committed
354
355
356
357
358
359
360
361
362
363
					   genotype_bias[gene_counter] = 0.5f*genotype_bias[gene_counter];

				if (get_local_id(0) == 0)
				{
					cons_succ = 0;
					cons_fail++;
				}
			}
		}

364
		// Changing rho if needed
Leonardo Solis's avatar
Leonardo Solis committed
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
		if (get_local_id(0) == 0)
		{
			iteration_cnt++;

			if (cons_succ >= dockpars_cons_limit)
			{
				rho *= LS_EXP_FACTOR;
				cons_succ = 0;
			}
			else
				if (cons_fail >= dockpars_cons_limit)
				{
					rho *= LS_CONT_FACTOR;
					cons_fail = 0;
				}
		}
		barrier(CLK_LOCAL_MEM_FENCE);
	}

384
385
	// Updating eval counter and energy
	if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
386
387
388
389
		dockpars_evals_of_new_entities[run_id*dockpars_pop_size+entity_id] += evaluation_cnt;
		dockpars_energies_next[run_id*dockpars_pop_size+entity_id] = offspring_energy;
	}

390
391
392
393
	// Mapping torsion angles
	for (gene_counter = get_local_id(0);
	     gene_counter < dockpars_num_of_genes;
	     gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
394
		   if (gene_counter >= 3) {
Leonardo Solis's avatar
Leonardo Solis committed
395
			    map_angle(&(offspring_genotype[gene_counter]));
396
397
		   }
	}
Leonardo Solis's avatar
Leonardo Solis committed
398

399
	// Updating old offspring in population
Leonardo Solis's avatar
Leonardo Solis committed
400
401
	barrier(CLK_LOCAL_MEM_FENCE);

402
403
404
405
406
407
  	event_t ev2 = async_work_group_copy(dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
        	                            offspring_genotype,
        	                            dockpars_num_of_genes,0);

	// Asynchronous copy should be finished by here
	wait_group_events(1, &ev2);
Leonardo Solis's avatar
Leonardo Solis committed
408
}