Commit 8d518667 authored by Leonardo Solis's avatar Leonardo Solis

move local vars to kernels

parent ec0e16c3
......@@ -105,6 +105,15 @@ void gpu_perform_elitist_selection(int dockpars_pop_size,
__global float* dockpars_conformations_next,
__global const float* dockpars_conformations_current
#endif
,
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
__local float* best_energies,
__local int* best_IDs,
__local int* best_ID
)
//The GPU device function performs elitist selection,
//that is, it looks for the best entity in conformations_current and
......@@ -115,11 +124,13 @@ void gpu_perform_elitist_selection(int dockpars_pop_size,
int entity_counter;
int gene_counter;
__local float best_energies[NUM_OF_THREADS_PER_BLOCK];
__local int best_IDs[NUM_OF_THREADS_PER_BLOCK];
float best_energy;
__local int best_ID;
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
//__local float best_energies[NUM_OF_THREADS_PER_BLOCK];
//__local int best_IDs[NUM_OF_THREADS_PER_BLOCK];
//__local int best_ID;
if (get_local_id(0) < dockpars_pop_size)
{
......@@ -133,8 +144,8 @@ void gpu_perform_elitist_selection(int dockpars_pop_size,
if (dockpars_energies_current[get_group_id(0)+entity_counter] < best_energies[get_local_id(0)])
{
best_energies[get_local_id(0)] = dockpars_energies_current[get_group_id(0)+entity_counter];
best_IDs[get_local_id(0)] = entity_counter;
best_energies[get_local_id(0)] = dockpars_energies_current[get_group_id(0)+entity_counter];
best_IDs[get_local_id(0)] = entity_counter;
}
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -144,7 +155,8 @@ void gpu_perform_elitist_selection(int dockpars_pop_size,
if (get_local_id(0) == 0)
{
best_energy = best_energies[0];
best_ID = best_IDs[0];
//best_ID = best_IDs[0];
best_ID[0] = best_IDs[0];
for (entity_counter=1;
entity_counter<NUM_OF_THREADS_PER_BLOCK;
......@@ -153,7 +165,8 @@ void gpu_perform_elitist_selection(int dockpars_pop_size,
if ((best_energies[entity_counter] < best_energy) && (entity_counter < dockpars_pop_size))
{
best_energy = best_energies[entity_counter];
best_ID = best_IDs[entity_counter];
//best_ID = best_IDs[entity_counter];
best_ID[0] = best_IDs[entity_counter];
}
//setting energy value of new entity
......@@ -170,5 +183,6 @@ void gpu_perform_elitist_selection(int dockpars_pop_size,
for (gene_counter=get_local_id(0);
gene_counter<dockpars_num_of_genes;
gene_counter+=NUM_OF_THREADS_PER_BLOCK)
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+gene_counter] = dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID+gene_counter];
//dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+gene_counter] = dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID+gene_counter];
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+gene_counter] = dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID[0]+gene_counter];
}
......@@ -3,27 +3,34 @@
// All related pragmas are in defines.h (accesible by host and device code)
void gpu_calc_energy( int dockpars_rotbondlist_length,
char dockpars_num_of_atoms,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
char dockpars_num_of_atoms,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
#if defined (RESTRICT_ARGS)
__global const float* restrict dockpars_fgrids, // cannot be allocated in __constant (too large)
__global const float* restrict dockpars_fgrids, // cannot be allocated in __constant (too large)
#else
__global const float* dockpars_fgrids, // cannot be allocated in __constant (too large)
__global const float* dockpars_fgrids, // cannot be allocated in __constant (too large)
#endif
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
float dockpars_grid_spacing,
float dockpars_coeff_elec,
float dockpars_qasp,
float dockpars_coeff_desolv,
__local float* genotype,
__local float* energy,
__local int* run_id,
__constant float* atom_charges_const,
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
float dockpars_grid_spacing,
float dockpars_coeff_elec,
float dockpars_qasp,
float dockpars_coeff_desolv,
__local float* genotype,
__local float* energy,
__local int* run_id,
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
__local float* calc_coords_x,
__local float* calc_coords_y,
__local float* calc_coords_z,
__local float* partial_energies,
__constant float* atom_charges_const,
__constant char* atom_types_const,
__constant char* intraE_contributors_const,
__constant float* VWpars_AC_const,
......@@ -65,10 +72,12 @@ void gpu_calc_energy( int dockpars_rotbondlist_length,
float quatrot_left_x, quatrot_left_y, quatrot_left_z, quatrot_left_q;
float quatrot_temp_x, quatrot_temp_y, quatrot_temp_z, quatrot_temp_q;
__local float calc_coords_x[MAX_NUM_OF_ATOMS];
__local float calc_coords_y[MAX_NUM_OF_ATOMS];
__local float calc_coords_z[MAX_NUM_OF_ATOMS];
__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
//__local float calc_coords_x[MAX_NUM_OF_ATOMS];
//__local float calc_coords_y[MAX_NUM_OF_ATOMS];
//__local float calc_coords_z[MAX_NUM_OF_ATOMS];
//__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];
partial_energies[get_local_id(0)] = 0.0f;
......
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
gpu_calc_initpop( char dockpars_num_of_atoms,
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
float dockpars_grid_spacing,
gpu_calc_initpop( char dockpars_num_of_atoms,
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
float dockpars_grid_spacing,
#if defined (RESTRICT_ARGS)
__global const float* restrict dockpars_fgrids, // cannot be allocated in __constant (too large)
#else
__global const float* dockpars_fgrids, // cannot be allocated in __constant (too large)
#endif
int dockpars_rotbondlist_length,
float dockpars_coeff_elec,
float dockpars_coeff_desolv,
int dockpars_rotbondlist_length,
float dockpars_coeff_elec,
float dockpars_coeff_desolv,
#if defined (RESTRICT_ARGS)
__global const float* restrict dockpars_conformations_current,
__global float* restrict dockpars_energies_current,
__global const float* restrict dockpars_conformations_current,
__global float* restrict dockpars_energies_current,
__global int* restrict dockpars_evals_of_new_entities,
#else
__global const float* dockpars_conformations_current,
__global float* dockpars_energies_current,
__global const float* dockpars_conformations_current,
__global float* dockpars_energies_current,
__global int* dockpars_evals_of_new_entities,
#endif
int dockpars_pop_size,
float dockpars_qasp,
int dockpars_pop_size,
float dockpars_qasp,
__constant float* atom_charges_const,
__constant float* atom_charges_const,
__constant char* atom_types_const,
__constant char* intraE_contributors_const,
__constant char* intraE_contributors_const,
__constant float* VWpars_AC_const,
__constant float* VWpars_BD_const,
__constant float* dspars_S_const,
......@@ -43,57 +43,69 @@ gpu_calc_initpop( char dockpars_num_of_atoms,
__constant float* rotbonds_unit_vectors_const,
__constant float* ref_orientation_quats_const
){
__local float genotype[GENOTYPE_LENGTH_IN_GLOBMEM];
__local float energy;
__local int run_id;
__local int run_id;
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
__local float calc_coords_x[MAX_NUM_OF_ATOMS];
__local float calc_coords_y[MAX_NUM_OF_ATOMS];
__local float calc_coords_z[MAX_NUM_OF_ATOMS];
__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];
event_t ev = async_work_group_copy(genotype,
dockpars_conformations_current + GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0),
GENOTYPE_LENGTH_IN_GLOBMEM, 0);
dockpars_conformations_current + GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0),
GENOTYPE_LENGTH_IN_GLOBMEM, 0);
wait_group_events(1,&ev);
//determining run ID
if (get_local_id(0) == 0)
//determining run ID
if (get_local_id(0) == 0)
run_id = get_group_id(0) / dockpars_pop_size;
// =============================================================
// WARNING: only energy of work-item=0 will be valid
// WARNING: only energy of work-item=0 will be valid
gpu_calc_energy(dockpars_rotbondlist_length,
dockpars_num_of_atoms,
dockpars_gridsize_x,
dockpars_gridsize_y,
dockpars_gridsize_z,
dockpars_fgrids,
dockpars_num_of_atypes,
dockpars_num_of_intraE_contributors,
dockpars_grid_spacing,
dockpars_coeff_elec,
dockpars_qasp,
dockpars_coeff_desolv,
genotype,
&energy,
&run_id,
dockpars_num_of_atoms,
dockpars_gridsize_x,
dockpars_gridsize_y,
dockpars_gridsize_z,
dockpars_fgrids,
dockpars_num_of_atypes,
dockpars_num_of_intraE_contributors,
dockpars_grid_spacing,
dockpars_coeff_elec,
dockpars_qasp,
dockpars_coeff_desolv,
genotype,
&energy,
&run_id,
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
calc_coords_x,
calc_coords_y,
calc_coords_z,
partial_energies,
atom_charges_const,
atom_types_const,
intraE_contributors_const,
VWpars_AC_const,
VWpars_BD_const,
dspars_S_const,
dspars_V_const,
rotlist_const,
ref_coords_x_const,
ref_coords_y_const,
ref_coords_z_const,
rotbonds_moving_vectors_const,
rotbonds_unit_vectors_const,
ref_orientation_quats_const);
atom_charges_const,
atom_types_const,
intraE_contributors_const,
VWpars_AC_const,
VWpars_BD_const,
dspars_S_const,
dspars_V_const,
rotlist_const,
ref_coords_x_const,
ref_coords_y_const,
ref_coords_z_const,
rotbonds_moving_vectors_const,
rotbonds_unit_vectors_const,
ref_orientation_quats_const);
// =============================================================
if (get_local_id(0) == 0) {
dockpars_energies_current[get_group_id(0)] = energy;
dockpars_evals_of_new_entities[get_group_id(0)] = 1;
}
if (get_local_id(0) == 0) {
dockpars_energies_current[get_group_id(0)] = energy;
dockpars_evals_of_new_entities[get_group_id(0)] = 1;
}
}
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
gpu_sum_evals(/*unsigned long pop_size,*/
unsigned int pop_size,
/*unsigned long num_of_runs,*/
/*unsigned long num_of_runs,*/
#if defined (RESTRICT_ARGS)
__global int* restrict dockpars_evals_of_new_entities,
__global int* restrict evals_of_runs
__global int* restrict evals_of_runs
#else
__global int* dockpars_evals_of_new_entities,
__global int* evals_of_runs
__global int* evals_of_runs
#endif
)
//The GPU global function sums the evaluation counter states
......
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
perform_LS( char dockpars_num_of_atoms,
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
float dockpars_grid_spacing,
perform_LS( char dockpars_num_of_atoms,
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
float dockpars_grid_spacing,
#if defined (RESTRICT_ARGS)
__global const float* restrict dockpars_fgrids, // cannot be allocated in __constant (too large)
......@@ -13,36 +13,36 @@ perform_LS( char dockpars_num_of_atoms,
__global const float* dockpars_fgrids, // cannot be allocated in __constant (too large)
#endif
int dockpars_rotbondlist_length,
float dockpars_coeff_elec,
float dockpars_coeff_desolv,
int dockpars_rotbondlist_length,
float dockpars_coeff_elec,
float dockpars_coeff_desolv,
#if defined (RESTRICT_ARGS)
__global float* restrict dockpars_conformations_next,
__global float* restrict dockpars_energies_next,
__global int* restrict dockpars_evals_of_new_entities,
__global unsigned int* restrict dockpars_prng_states,
__global float* restrict dockpars_conformations_next,
__global float* restrict dockpars_energies_next,
__global int* restrict dockpars_evals_of_new_entities,
__global unsigned int* restrict dockpars_prng_states,
#else
__global float* dockpars_conformations_next,
__global float* dockpars_energies_next,
__global int* dockpars_evals_of_new_entities,
__global unsigned int* dockpars_prng_states,
__global float* dockpars_conformations_next,
__global float* dockpars_energies_next,
__global int* dockpars_evals_of_new_entities,
__global unsigned int* dockpars_prng_states,
#endif
int dockpars_pop_size,
int dockpars_num_of_genes,
float dockpars_lsearch_rate,
unsigned int dockpars_num_of_lsentities,
float dockpars_rho_lower_bound,
float dockpars_base_dmov_mul_sqrt3,
float dockpars_base_dang_mul_sqrt3,
unsigned int dockpars_cons_limit,
unsigned int dockpars_max_num_of_iters,
float dockpars_qasp,
__constant float* atom_charges_const,
int dockpars_pop_size,
int dockpars_num_of_genes,
float dockpars_lsearch_rate,
unsigned int dockpars_num_of_lsentities,
float dockpars_rho_lower_bound,
float dockpars_base_dmov_mul_sqrt3,
float dockpars_base_dang_mul_sqrt3,
unsigned int dockpars_cons_limit,
unsigned int dockpars_max_num_of_iters,
float dockpars_qasp,
__constant float* atom_charges_const,
__constant char* atom_types_const,
__constant char* intraE_contributors_const,
__constant char* intraE_contributors_const,
__constant float* VWpars_AC_const,
__constant float* VWpars_BD_const,
__constant float* dspars_S_const,
......@@ -66,7 +66,7 @@ perform_LS( char dockpars_num_of_atoms,
__local float genotype_candidate[ACTUAL_GENOTYPE_LENGTH];
__local float genotype_deviate [ACTUAL_GENOTYPE_LENGTH];
__local float genotype_bias [ACTUAL_GENOTYPE_LENGTH];
__local float rho;
__local float rho;
__local int cons_succ;
__local int cons_fail;
__local int iteration_cnt;
......@@ -79,6 +79,13 @@ perform_LS( char dockpars_num_of_atoms,
__local int entity_id;
__local float offspring_energy;
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
__local float calc_coords_x[MAX_NUM_OF_ATOMS];
__local float calc_coords_y[MAX_NUM_OF_ATOMS];
__local float calc_coords_z[MAX_NUM_OF_ATOMS];
__local float partial_energies[NUM_OF_THREADS_PER_BLOCK];
//determining run ID and entity ID, initializing
if (get_local_id(0) == 0)
{
......@@ -89,7 +96,7 @@ perform_LS( char dockpars_num_of_atoms,
if (entity_id == 0)
if (100.0f*gpu_randf(dockpars_prng_states) > dockpars_lsearch_rate)
entity_id = dockpars_num_of_lsentities; //if entity 0 is not selected according to LS rate,
//choosing an other entity
//choosing an other entity
offspring_energy = dockpars_energies_next[run_id*dockpars_pop_size+entity_id];
}
......@@ -98,7 +105,7 @@ perform_LS( char dockpars_num_of_atoms,
#if defined (ASYNC_COPY)
async_work_group_copy(offspring_genotype,
dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
dockpars_conformations_next+(run_id*dockpars_pop_size+entity_id)*GENOTYPE_LENGTH_IN_GLOBMEM,
dockpars_num_of_genes,0);
#else
for (gene_counter=get_local_id(0);
......@@ -148,35 +155,41 @@ perform_LS( char dockpars_num_of_atoms,
// ==================================================================
gpu_calc_energy(dockpars_rotbondlist_length,
dockpars_num_of_atoms,
dockpars_gridsize_x,
dockpars_gridsize_y,
dockpars_gridsize_z,
dockpars_fgrids,
dockpars_num_of_atypes,
dockpars_num_of_intraE_contributors,
dockpars_grid_spacing,
dockpars_coeff_elec,
dockpars_qasp,
dockpars_coeff_desolv,
genotype_candidate,
&candidate_energy,
&run_id,
atom_charges_const,
atom_types_const,
intraE_contributors_const,
VWpars_AC_const,
VWpars_BD_const,
dspars_S_const,
dspars_V_const,
rotlist_const,
ref_coords_x_const,
ref_coords_y_const,
ref_coords_z_const,
rotbonds_moving_vectors_const,
rotbonds_unit_vectors_const,
ref_orientation_quats_const);
dockpars_num_of_atoms,
dockpars_gridsize_x,
dockpars_gridsize_y,
dockpars_gridsize_z,
dockpars_fgrids,
dockpars_num_of_atypes,
dockpars_num_of_intraE_contributors,
dockpars_grid_spacing,
dockpars_coeff_elec,
dockpars_qasp,
dockpars_coeff_desolv,
genotype_candidate,
&candidate_energy,
&run_id,
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
calc_coords_x,
calc_coords_y,
calc_coords_z,
partial_energies,
atom_charges_const,
atom_types_const,
intraE_contributors_const,
VWpars_AC_const,
VWpars_BD_const,
dspars_S_const,
dspars_V_const,
rotlist_const,
ref_coords_x_const,
ref_coords_y_const,
ref_coords_z_const,
rotbonds_moving_vectors_const,
rotbonds_unit_vectors_const,
ref_orientation_quats_const);
// =================================================================
if (get_local_id(0) == 0)
......@@ -222,35 +235,41 @@ perform_LS( char dockpars_num_of_atoms,
// =================================================================
gpu_calc_energy(dockpars_rotbondlist_length,
dockpars_num_of_atoms,
dockpars_gridsize_x,
dockpars_gridsize_y,
dockpars_gridsize_z,
dockpars_fgrids,
dockpars_num_of_atypes,
dockpars_num_of_intraE_contributors,
dockpars_grid_spacing,
dockpars_coeff_elec,
dockpars_qasp,
dockpars_coeff_desolv,
genotype_candidate,
&candidate_energy,
&run_id,
atom_charges_const,
atom_types_const,
intraE_contributors_const,
VWpars_AC_const,
VWpars_BD_const,
dspars_S_const,
dspars_V_const,
rotlist_const,
ref_coords_x_const,
ref_coords_y_const,
ref_coords_z_const,
rotbonds_moving_vectors_const,
rotbonds_unit_vectors_const,
ref_orientation_quats_const);
dockpars_num_of_atoms,
dockpars_gridsize_x,
dockpars_gridsize_y,
dockpars_gridsize_z,
dockpars_fgrids,
dockpars_num_of_atypes,
dockpars_num_of_intraE_contributors,
dockpars_grid_spacing,
dockpars_coeff_elec,
dockpars_qasp,
dockpars_coeff_desolv,
genotype_candidate,
&candidate_energy,
&run_id,
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
calc_coords_x,
calc_coords_y,
calc_coords_z,
partial_energies,
atom_charges_const,
atom_types_const,
intraE_contributors_const,
VWpars_AC_const,
VWpars_BD_const,
dspars_S_const,
dspars_V_const,
rotlist_const,
ref_coords_x_const,
ref_coords_y_const,
ref_coords_z_const,
rotbonds_moving_vectors_const,
rotbonds_unit_vectors_const,
ref_orientation_quats_const);
// =================================================================
if (get_local_id(0) == 0)
......
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
gpu_gen_and_eval_newpops(char dockpars_num_of_atoms,
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
float dockpars_grid_spacing,
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
float dockpars_grid_spacing,
#if defined (RESTRICT_ARGS)
__global const float* restrict dockpars_fgrids, // cannot be allocated in __constant (too large)
#else
__global const float* dockpars_fgrids, // cannot be allocated in __constant (too large)
__global const float* dockpars_fgrids, // cannot be allocated in __constant (too large)
#endif
int dockpars_rotbondlist_length,
float dockpars_coeff_elec,
float dockpars_coeff_desolv,
int dockpars_rotbondlist_length,
float dockpars_coeff_elec,
float dockpars_coeff_desolv,
#if defined (RESTRICT_ARGS)
__global const float* restrict dockpars_conformations_current,
__global float* restrict dockpars_energies_current,
__global float* restrict dockpars_conformations_next,
__global float* restrict dockpars_energies_next,
__global int* restrict dockpars_evals_of_new_entities,
__global unsigned int* restrict dockpars_prng_states,
__global const float* restrict dockpars_conformations_current,
__global float* restrict dockpars_energies_current,
__global float* restrict dockpars_conformations_next,
__global float* restrict dockpars_energies_next,
__global int* restrict dockpars_evals_of_new_entities,
__global unsigned int* restrict dockpars_prng_states,
#else
__global const float* dockpars_conformations_current,
__global float* dockpars_energies_current,
__global float* dockpars_conformations_next,
__global float* dockpars_energies_next,
__global int* dockpars_evals_of_new_entities,
__global unsigned int* dockpars_prng_states,
__global const float* dockpars_conformations_current,
__global float* dockpars_energies_current,
__global float* dockpars_conformations_next,
__global float* dockpars_energies_next,
__global int* dockpars_evals_of_new_entities,
__global unsigned int* dockpars_prng_states,
#endif
int dockpars_pop_size,
int dockpars_num_of_genes,
float dockpars_tournament_rate,
float dockpars_crossover_rate,
float dockpars_mutation_rate,
float dockpars_abs_max_dmov,
float dockpars_abs_max_dang,
float dockpars_qasp,
int dockpars_pop_size,
int dockpars_num_of_genes,
float dockpars_tournament_rate,
float dockpars_crossover_rate,
float dockpars_mutation_rate,
float dockpars_abs_max_dmov,
float dockpars_abs_max_dang,
float dockpars_qasp,