kernel4.cl 11 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*

OCLADock, an OpenCL implementation of AutoDock 4.2 running a Lamarckian Genetic Algorithm
Copyright (C) 2017 TU Darmstadt, Embedded Systems and Applications Group, Germany. All rights reserved.

AutoDock is a Trade Mark of the Scripps Research Institute.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

*/


Leonardo Solis's avatar
Leonardo Solis committed
25
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
26
27
gpu_gen_and_eval_newpops(
			 char   dockpars_num_of_atoms,
Leonardo Solis's avatar
Leonardo Solis committed
28
29
30
31
32
			 char   dockpars_num_of_atypes,
			 int    dockpars_num_of_intraE_contributors,
			 char   dockpars_gridsize_x,
			 char   dockpars_gridsize_y,
			 char   dockpars_gridsize_z,
33
34
35
							    		// g1 = gridsize_x
  			 uint   dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
			 uint   dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
Leonardo Solis's avatar
Leonardo Solis committed
36
			 float  dockpars_grid_spacing,
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
          __global const float* restrict dockpars_fgrids, // This is too large to be allocated in __constant 
	                 int    dockpars_rotbondlist_length,
 			 float  dockpars_coeff_elec,
			 float  dockpars_coeff_desolv,
          __global const float* restrict  dockpars_conformations_current,
          __global       float* restrict  dockpars_energies_current,
          __global       float* restrict  dockpars_conformations_next,
          __global       float* restrict  dockpars_energies_next,
          __global       int*   restrict  dockpars_evals_of_new_entities,
          __global       uint*  restrict dockpars_prng_states,
	                 int    dockpars_pop_size,
	                 int    dockpars_num_of_genes,
		         float  dockpars_tournament_rate,
	                 float  dockpars_crossover_rate,
		         float  dockpars_mutation_rate,
		         float  dockpars_abs_max_dmov,
		         float  dockpars_abs_max_dang,
		         float  dockpars_qasp,
Leonardo Solis's avatar
Leonardo Solis committed
55
	      __constant float* atom_charges_const,
Leonardo Solis's avatar
Leonardo Solis committed
56
              __constant char*  atom_types_const,
Leonardo Solis's avatar
Leonardo Solis committed
57
	      __constant char*  intraE_contributors_const,
Leonardo Solis's avatar
Leonardo Solis committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
              __constant float* VWpars_AC_const,
              __constant float* VWpars_BD_const,
              __constant float* dspars_S_const,
              __constant float* dspars_V_const,
              __constant int*   rotlist_const,
              __constant float* ref_coords_x_const,
              __constant float* ref_coords_y_const,
              __constant float* ref_coords_z_const,
              __constant float* rotbonds_moving_vectors_const,
              __constant float* rotbonds_unit_vectors_const,
              __constant float* ref_orientation_quats_const
)
//The GPU global function
{
72
73
74
75
	// Some OpenCL compilers don't allow declaring 
	// local variables within non-kernel functions.
	// These local variables must be declared in a kernel, 
	// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
76
77
78
79
80
81
82
83
84
85
86
	__local float offspring_genotype[ACTUAL_GENOTYPE_LENGTH];
	__local int parent_candidates[4];
	__local float candidate_energies[4];
	__local int parents[2];
	__local int run_id;
	__local int covr_point[2];
	__local float randnums[10];
	int temp_covr_point;
	int gene_counter;
	__local float energy;	//could be shared since only thread 0 will use it

Leonardo Solis's avatar
Leonardo Solis committed
87
88
89
90
91
92
93
94
	__local float best_energies[NUM_OF_THREADS_PER_BLOCK];
	__local int best_IDs[NUM_OF_THREADS_PER_BLOCK];
        __local int best_ID[1]; //__local int best_ID;

	__local float calc_coords_x[MAX_NUM_OF_ATOMS];
	__local float calc_coords_y[MAX_NUM_OF_ATOMS];
	__local float calc_coords_z[MAX_NUM_OF_ATOMS];
	__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];
95
	#if defined (DEBUG_ENERGY_KERNEL4)
96
97
98
	__local float partial_interE [NUM_OF_THREADS_PER_BLOCK];
	__local float partial_intraE [NUM_OF_THREADS_PER_BLOCK];
	#endif
Leonardo Solis's avatar
Leonardo Solis committed
99

100
101
	// In this case this compute-unit is responsible for elitist selection
	if ((get_group_id(0) % dockpars_pop_size) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
102
		gpu_perform_elitist_selection(dockpars_pop_size,
Leonardo Solis's avatar
Leonardo Solis committed
103
104
105
106
107
					      dockpars_energies_current,
					      dockpars_energies_next,
					      dockpars_evals_of_new_entities,
					      dockpars_num_of_genes,
					      dockpars_conformations_next,
108
				              dockpars_conformations_current,
Leonardo Solis's avatar
Leonardo Solis committed
109
110
111
					      best_energies,
					      best_IDs,
					      best_ID);
112
	}
Leonardo Solis's avatar
Leonardo Solis committed
113
114
	else
	{
115
116
117
118
119
120
121
122
123
124
125
126
		// Generating the following random numbers: 
		// [0..3] for parent candidates,
		// [4..5] for binary tournaments, [6] for deciding crossover,
		// [7..8] for crossover points, [9] for local search
		for (gene_counter = get_local_id(0);
		     gene_counter < 10;
		     gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
			randnums[gene_counter] = gpu_randf(dockpars_prng_states);
		}

		// Determining run ID
		if (get_local_id(0) == 0) {
Leonardo Solis's avatar
Leonardo Solis committed
127
			run_id = get_group_id(0) / dockpars_pop_size;
128
		}
Leonardo Solis's avatar
Leonardo Solis committed
129

130
		// Performing binary tournament selection
Leonardo Solis's avatar
Leonardo Solis committed
131
132
133
134
		barrier(CLK_LOCAL_MEM_FENCE);

		if (get_local_id(0) < 4)	//it is not ensured that the four candidates will be different...
		{
135
			parent_candidates[get_local_id(0)]  = (int) (dockpars_pop_size*randnums[get_local_id(0)]); //using randnums[0..3]
Leonardo Solis's avatar
Leonardo Solis committed
136
137
138
139
140
			candidate_energies[get_local_id(0)] = dockpars_energies_current[run_id*dockpars_pop_size+parent_candidates[get_local_id(0)]];
		}

		barrier(CLK_LOCAL_MEM_FENCE);

141
		if (get_local_id(0) < 2) 
Leonardo Solis's avatar
Leonardo Solis committed
142
143
		{
			if (candidate_energies[2*get_local_id(0)] < candidate_energies[2*get_local_id(0)+1])
144
				if (100.0f*randnums[4+get_local_id(0)] < dockpars_tournament_rate) {		//using randnum[4..5]
Leonardo Solis's avatar
Leonardo Solis committed
145
					parents[get_local_id(0)] = parent_candidates[2*get_local_id(0)];
146
147
				}
				else {
Leonardo Solis's avatar
Leonardo Solis committed
148
					parents[get_local_id(0)] = parent_candidates[2*get_local_id(0)+1];
149
				}
Leonardo Solis's avatar
Leonardo Solis committed
150
			else
151
				if (100.0f*randnums[4+get_local_id(0)] < dockpars_tournament_rate) {
Leonardo Solis's avatar
Leonardo Solis committed
152
					parents[get_local_id(0)] = parent_candidates[2*get_local_id(0)+1];
153
154
				}
				else {
Leonardo Solis's avatar
Leonardo Solis committed
155
					parents[get_local_id(0)] = parent_candidates[2*get_local_id(0)];
156
				}
Leonardo Solis's avatar
Leonardo Solis committed
157
158
		}

159
		// Performing crossover
Leonardo Solis's avatar
Leonardo Solis committed
160
161
		barrier(CLK_LOCAL_MEM_FENCE);

162
		if (100.0f*randnums[6] < dockpars_crossover_rate)	// Using randnums[6]
Leonardo Solis's avatar
Leonardo Solis committed
163
		{
164
165
			if (get_local_id(0) < 2) {
				// Using randnum[7..8]
Leonardo Solis's avatar
Leonardo Solis committed
166
				covr_point[get_local_id(0)] = (int) ((dockpars_num_of_genes-1)*randnums[7+get_local_id(0)]);
167
			}
Leonardo Solis's avatar
Leonardo Solis committed
168
169

			barrier(CLK_LOCAL_MEM_FENCE);
170
171
172
173
			
			// covr_point[0] should store the lower crossover-point
			if (get_local_id(0) == 0) {
				if (covr_point[1] < covr_point[0]) {
Leonardo Solis's avatar
Leonardo Solis committed
174
					temp_covr_point = covr_point[1];
175
176
					covr_point[1]   = covr_point[0];
					covr_point[0]   = temp_covr_point;
Leonardo Solis's avatar
Leonardo Solis committed
177
				}
178
			}
Leonardo Solis's avatar
Leonardo Solis committed
179
180
181

			barrier(CLK_LOCAL_MEM_FENCE);

182
183
184
			for (gene_counter = get_local_id(0);
			     gene_counter < dockpars_num_of_genes;
			     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
185
			{
186
187
188
				// Two-point crossover
				if (covr_point[0] != covr_point[1]) 
				{
Leonardo Solis's avatar
Leonardo Solis committed
189
190
191
192
					if ((gene_counter <= covr_point[0]) || (gene_counter > covr_point[1]))
						offspring_genotype[gene_counter] = dockpars_conformations_current[(run_id*dockpars_pop_size+parents[0])*GENOTYPE_LENGTH_IN_GLOBMEM+gene_counter];
					else
						offspring_genotype[gene_counter] = dockpars_conformations_current[(run_id*dockpars_pop_size+parents[1])*GENOTYPE_LENGTH_IN_GLOBMEM+gene_counter];
193
194
195
196
				}
				// Single-point crossover
				else
				{									             
Leonardo Solis's avatar
Leonardo Solis committed
197
198
199
200
					if (gene_counter <= covr_point[0])
						offspring_genotype[gene_counter] = dockpars_conformations_current[(run_id*dockpars_pop_size+parents[0])*GENOTYPE_LENGTH_IN_GLOBMEM+gene_counter];
					else
						offspring_genotype[gene_counter] = dockpars_conformations_current[(run_id*dockpars_pop_size+parents[1])*GENOTYPE_LENGTH_IN_GLOBMEM+gene_counter];
201
				}
Leonardo Solis's avatar
Leonardo Solis committed
202
203
204
205
206
207
			}

		}
		else	//no crossover
		{
			async_work_group_copy(offspring_genotype,
Leonardo Solis's avatar
Leonardo Solis committed
208
					     dockpars_conformations_current+(run_id*dockpars_pop_size+parents[0])*GENOTYPE_LENGTH_IN_GLOBMEM,
209
210
					     dockpars_num_of_genes, 0);
		} // End of crossover
Leonardo Solis's avatar
Leonardo Solis committed
211
212
213

		barrier(CLK_LOCAL_MEM_FENCE);

214
215
216
217
		// Performing mutation
		for (gene_counter = get_local_id(0);
		     gene_counter < dockpars_num_of_genes;
		     gene_counter+= NUM_OF_THREADS_PER_BLOCK)
Leonardo Solis's avatar
Leonardo Solis committed
218
219
220
		{
			if (100.0f*gpu_randf(dockpars_prng_states) < dockpars_mutation_rate)
			{
221
222
				// Translation genes
				if (gene_counter <= 2) {
223
224
					offspring_genotype[gene_counter] += dockpars_abs_max_dmov*(2*gpu_randf(dockpars_prng_states)-1);
				}
225
226
				// Shoemake genes (u1, u2, u3) ranges between [0,1]
				else if (gene_counter <= 5) {
227
					offspring_genotype[gene_counter] = gpu_randf(dockpars_prng_states);
228
				}
229
230
				// Torsion genes
				else {
Leonardo Solis's avatar
Leonardo Solis committed
231
232
233
					offspring_genotype[gene_counter] += dockpars_abs_max_dang*(2*gpu_randf(dockpars_prng_states)-1);
					map_angle(&(offspring_genotype[gene_counter]));
				}
234

Leonardo Solis's avatar
Leonardo Solis committed
235
			}
236
		} // End of mutation
Leonardo Solis's avatar
Leonardo Solis committed
237

238
		// Calculating energy of new offspring
Leonardo Solis's avatar
Leonardo Solis committed
239
240
		barrier(CLK_LOCAL_MEM_FENCE);

Leonardo Solis's avatar
Leonardo Solis committed
241
		// =============================================================
Leonardo Solis's avatar
Leonardo Solis committed
242
		gpu_calc_energy(dockpars_rotbondlist_length,
Leonardo Solis's avatar
Leonardo Solis committed
243
244
245
246
				dockpars_num_of_atoms,
				dockpars_gridsize_x,
				dockpars_gridsize_y,
	                        dockpars_gridsize_z,
247
248
249
								    	// g1 = gridsize_x
				dockpars_gridsize_x_times_y, 		// g2 = gridsize_x * gridsize_y
				dockpars_gridsize_x_times_y_times_z,	// g3 = gridsize_x * gridsize_y * gridsize_z
Leonardo Solis's avatar
Leonardo Solis committed
250
251
252
253
254
255
256
257
258
259
				dockpars_fgrids,
				dockpars_num_of_atypes,
				dockpars_num_of_intraE_contributors,
				dockpars_grid_spacing,
				dockpars_coeff_elec,
                                dockpars_qasp,
				dockpars_coeff_desolv,
				offspring_genotype,
				&energy,
				&run_id,
260
261
262
263
				// Some OpenCL compilers don't allow declaring 
				// local variables within non-kernel functions.
				// These local variables must be declared in a kernel, 
				// and then passed to non-kernel functions.
Leonardo Solis's avatar
Leonardo Solis committed
264
265
266
267
				calc_coords_x,
				calc_coords_y,
				calc_coords_z,
				partial_energies,
268
				#if defined (DEBUG_ENERGY_KERNEL4)
269
270
271
				partial_interE,
				partial_intraE,
				#endif
Leonardo Solis's avatar
Leonardo Solis committed
272
273
274
275
276
277
278
279
280
281
282
283
284
285
		
                                atom_charges_const,
	                        atom_types_const,
				intraE_contributors_const,
				VWpars_AC_const,
				VWpars_BD_const,
				dspars_S_const,
				dspars_V_const,
				rotlist_const,
				ref_coords_x_const,
				ref_coords_y_const,
				ref_coords_z_const,
				rotbonds_moving_vectors_const,
				rotbonds_unit_vectors_const,
286
287
				ref_orientation_quats_const
				);
Leonardo Solis's avatar
Leonardo Solis committed
288
		// =============================================================
Leonardo Solis's avatar
Leonardo Solis committed
289
290
291
292

		if (get_local_id(0) == 0) {
			dockpars_evals_of_new_entities[get_group_id(0)] = 1;
			dockpars_energies_next[get_group_id(0)] = energy;
293
294
295
296

			#if defined (DEBUG_ENERGY_KERNEL4)
			printf("%-18s [%-5s]---{%-5s}   [%-10.8f]---{%-10.8f}\n", "-ENERGY-KERNEL4-", "GRIDS", "INTRA", partial_interE[0], partial_intraE[0]);
			#endif
Leonardo Solis's avatar
Leonardo Solis committed
297
298
		}

299
		// Copying new offspring to next generation
Leonardo Solis's avatar
Leonardo Solis committed
300
301
		async_work_group_copy(dockpars_conformations_next + GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0),
				      offspring_genotype,
302
				      dockpars_num_of_genes, 0);
Leonardo Solis's avatar
Leonardo Solis committed
303
304
  }
}