Commit d266192b authored by Leonardo Solis's avatar Leonardo Solis
Browse files

cleaned up and minor corrections

parent cd23a9ad
......@@ -199,7 +199,7 @@ odock: check-env-all stringify $(SRC)
# Example
PDB := 3ce3
NRUN := 100
POPSIZE := 150
POPSIZE := 500
TESTNAME:= test
test: odock
......
......@@ -39,26 +39,24 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define NUM_OF_THREADS_PER_BLOCK 64
#endif
#define MAX_NUM_OF_ATOMS 256
#define MAX_NUM_OF_ATYPES 14
#define MAX_NUM_OF_ROTBONDS 32
#define MAX_INTRAE_CONTRIBUTORS MAX_NUM_OF_ATOMS * MAX_NUM_OF_ATOMS
#define MAX_NUM_OF_ROTATIONS MAX_NUM_OF_ATOMS * MAX_NUM_OF_ROTBONDS
#define MAX_POPSIZE 2048
#define MAX_NUM_OF_RUNS 1000
#define MAX_NUM_OF_ATOMS 256
#define MAX_NUM_OF_ATYPES 14
#define MAX_NUM_OF_ROTBONDS 32
#define MAX_INTRAE_CONTRIBUTORS (MAX_NUM_OF_ATOMS * MAX_NUM_OF_ATOMS)
#define MAX_NUM_OF_ROTATIONS (MAX_NUM_OF_ATOMS * MAX_NUM_OF_ROTBONDS)
#define MAX_POPSIZE 2048
#define MAX_NUM_OF_RUNS 1000
// Must be bigger than MAX_NUM_OF_ROTBONDS+6
#define GENOTYPE_LENGTH_IN_GLOBMEM 64
#define ACTUAL_GENOTYPE_LENGTH (MAX_NUM_OF_ROTBONDS+6)
#define LS_EXP_FACTOR 2.0f
#define LS_CONT_FACTOR 0.5f
#define LS_EXP_FACTOR 2.0f
#define LS_CONT_FACTOR 0.5f
// Improvements over Pechan's implementation
#define NATIVE_PRECISION
#define ASYNC_COPY
#define IMPROVE_GRID
#define RESTRICT_ARGS
#define MAPPED_COPY
#endif /* DEFINES_H_ */
......@@ -25,12 +25,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
// -------------------------------------------------------
//
// -------------------------------------------------------
unsigned int gpu_rand(
#if defined (RESTRICT_ARGS)
__global unsigned int* restrict prng_states
#else
__global unsigned int* prng_states
#endif
uint gpu_rand(
__global uint* restrict prng_states
)
//The GPU device function generates a random int
//with a linear congruential generator.
......@@ -39,20 +35,20 @@ unsigned int gpu_rand(
//prng_states (thread with ID tx in block with ID bx stores its state in prng_states[bx*NUM_OF_THREADS_PER_BLOCK+$
//The random number generator uses the gcc linear congruential generator constants.
{
unsigned int state;
uint state;
#if defined (REPRO)
state = 1;
#else
//current state of the threads own PRNG
//state = prng_states[get_group_id(0)*NUM_OF_THREADS_PER_BLOCK + get_local_id(0)];
// Current state of the threads own PRNG
// state = prng_states[get_group_id(0)*NUM_OF_THREADS_PER_BLOCK + get_local_id(0)];
state = prng_states[get_global_id(0)];
//calculating next state
state = (RAND_A*state+RAND_C);
// Calculating next state
state = (RAND_A*state+RAND_C);
#endif
//saving next state to memory
//prng_states[get_group_id(0)*NUM_OF_THREADS_PER_BLOCK + get_local_id(0)] = state;
// Saving next state to memory
// prng_states[get_group_id(0)*NUM_OF_THREADS_PER_BLOCK + get_local_id(0)] = state;
prng_states[get_global_id(0)] = state;
return state;
......@@ -62,19 +58,15 @@ unsigned int gpu_rand(
//
// -------------------------------------------------------
float gpu_randf(
#if defined (RESTRICT_ARGS)
__global unsigned int* restrict prng_states
#else
__global unsigned int* prng_states
#endif
__global uint* restrict prng_states
)
//The GPU device function generates a
//random float greater than (or equal to) 0 and less than 1.
//It uses gpu_rand() function.
{
float state;
float state;
//state will be between 0 and 1
// State will be between 0 and 1
#if defined (REPRO)
state = 0.55f; //0.55f;
#else
......@@ -94,50 +86,33 @@ float gpu_randf(
//
// -------------------------------------------------------
void map_angle(__local float* angle)
//The GPU device function maps
//the input parameter to the interval 0...360
//(supposing that it is an angle).
// The GPU device function maps
// the input parameter to the interval 0...360
// (supposing that it is an angle).
{
while (*angle >= 360.0f)
*angle -= 360.0f;
while (*angle >= 360.0f) {
*angle -= 360.0f;
}
while (*angle < 0.0f)
*angle += 360.0f;
while (*angle < 0.0f) {
*angle += 360.0f;
}
}
// -------------------------------------------------------
//
// -------------------------------------------------------
void gpu_perform_elitist_selection(int dockpars_pop_size,
#if defined (RESTRICT_ARGS)
void gpu_perform_elitist_selection(
int dockpars_pop_size,
__global float* restrict dockpars_energies_current,
__global float* restrict dockpars_energies_next,
__global int* restrict dockpars_evals_of_new_entities,
#else
__global float* dockpars_energies_current,
__global float* dockpars_energies_next,
__global int* dockpars_evals_of_new_entities,
#endif
int dockpars_num_of_genes,
#if defined (RESTRICT_ARGS)
__global float* restrict dockpars_conformations_next,
__global const float* restrict dockpars_conformations_current
#else
__global float* dockpars_conformations_next,
__global const float* dockpars_conformations_current
#endif
,
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
__local float* best_energies,
__local int* best_IDs,
__local int* best_ID
__global const float* restrict dockpars_conformations_current,
__local float* best_energies,
__local int* best_IDs,
__local int* best_ID
)
//The GPU device function performs elitist selection,
//that is, it looks for the best entity in conformations_current and
......@@ -145,68 +120,59 @@ void gpu_perform_elitist_selection(int dockpars_pop_size,
//and copies it to the place of the first entity in
//conformations_next and energies_next.
{
int entity_counter;
int gene_counter;
float best_energy;
// Some OpenCL compilers don't allow local var outside kernels
// so this local vars are passed from a kernel
//__local float best_energies[NUM_OF_THREADS_PER_BLOCK];
//__local int best_IDs[NUM_OF_THREADS_PER_BLOCK];
//__local int best_ID;
if (get_local_id(0) < dockpars_pop_size)
{
if (get_local_id(0) < dockpars_pop_size) {
best_energies[get_local_id(0)] = dockpars_energies_current[get_group_id(0)+get_local_id(0)];
best_IDs[get_local_id(0)] = get_local_id(0);
}
for (entity_counter=NUM_OF_THREADS_PER_BLOCK+get_local_id(0);
entity_counter<dockpars_pop_size;
entity_counter+=NUM_OF_THREADS_PER_BLOCK)
for (entity_counter = NUM_OF_THREADS_PER_BLOCK+get_local_id(0);
entity_counter < dockpars_pop_size;
entity_counter+= NUM_OF_THREADS_PER_BLOCK) {
if (dockpars_energies_current[get_group_id(0)+entity_counter] < best_energies[get_local_id(0)])
{
if (dockpars_energies_current[get_group_id(0)+entity_counter] < best_energies[get_local_id(0)]) {
best_energies[get_local_id(0)] = dockpars_energies_current[get_group_id(0)+entity_counter];
best_IDs[get_local_id(0)] = entity_counter;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
//this could be implemented with a tree-like structure
//which may be slightly faster
// This could be implemented with a tree-like structure
// which may be slightly faster
if (get_local_id(0) == 0)
{
best_energy = best_energies[0];
//best_ID = best_IDs[0];
best_ID[0] = best_IDs[0];
for (entity_counter=1;
entity_counter<NUM_OF_THREADS_PER_BLOCK;
entity_counter++)
for (entity_counter = 1;
entity_counter < NUM_OF_THREADS_PER_BLOCK;
entity_counter++) {
if ((best_energies[entity_counter] < best_energy) && (entity_counter < dockpars_pop_size))
{
if ((best_energies[entity_counter] < best_energy) && (entity_counter < dockpars_pop_size)) {
best_energy = best_energies[entity_counter];
//best_ID = best_IDs[entity_counter];
best_ID[0] = best_IDs[entity_counter];
}
}
//setting energy value of new entity
// Setting energy value of new entity
dockpars_energies_next[get_group_id(0)] = best_energy;
//0 evals were performed for entity selected with elitism (since it was copied only)
// Zero (0) evals were performed for entity selected with elitism (since it was copied only)
dockpars_evals_of_new_entities[get_group_id(0)] = 0;
}
//now best_id stores the id of the best entity in the population,
//copying genotype and energy value to the first entity of new population
// "best_id" stores the id of the best entity in the population,
// Copying genotype and energy value to the first entity of new population
barrier(CLK_LOCAL_MEM_FENCE);
for (gene_counter=get_local_id(0);
gene_counter<dockpars_num_of_genes;
gene_counter+=NUM_OF_THREADS_PER_BLOCK)
//dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+gene_counter] = dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID+gene_counter];
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+gene_counter] = dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID[0]+gene_counter];
for (gene_counter = get_local_id(0);
gene_counter < dockpars_num_of_genes;
gene_counter+= NUM_OF_THREADS_PER_BLOCK) {
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+gene_counter] = dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0) +
GENOTYPE_LENGTH_IN_GLOBMEM*best_ID[0]+gene_counter];
}
}
// Implementation of auxiliary functions for the gradient-based minimizer
bool is_gradDescent_enabled(__local float* a_gNorm,
float gradMin_tol,
__local unsigned int* a_nIter,
unsigned int gradMin_maxiter,
__local float* a_perturbation,
__constant float* gradMin_conformation_min_perturbation,
__local bool* is_gradDescentEn,
uint gradMin_numElements)
{
bool is_gNorm_gt_gMin = (a_gNorm[0] >= gradMin_tol);
bool is_nIter_lt_maxIter = (a_nIter[0] <= gradMin_maxiter);
bool is_perturb_gt_gene_min [ACTUAL_GENOTYPE_LENGTH];
bool is_perturb_gt_genotype = true;
// Implementation of auxiliary functions
// for the gradient-based minimizer
bool is_gradDescent_enabled(
__local bool* is_gNorm_gt_gMin,
__local bool* is_nIter_lt_maxIter,
__local bool* is_perturb_gt_gene_min,
__local bool* is_perturb_gt_genotype,
__local float* local_gNorm,
float gradMin_tol,
__local uint* local_nIter,
uint gradMin_maxiter,
__local float* local_perturbation,
__constant float* gradMin_conformation_min_perturbation,
__local bool* is_gradDescentEn,
uint gradMin_numElements)
{
if (get_local_id(0) == 0) {
*is_gNorm_gt_gMin = (local_gNorm[0] >= gradMin_tol);
*is_nIter_lt_maxIter = (local_nIter[0] <= gradMin_maxiter);
*is_perturb_gt_genotype = true;
}
// For every gene, let's determine
// if perturbation is greater than min conformation
for(uint i=get_local_id(0);
i<gradMin_numElements;
i+=NUM_OF_THREADS_PER_BLOCK) {
is_perturb_gt_gene_min[i] = (a_perturbation[i] >= gradMin_conformation_min_perturbation[i]);
for(uint i = get_local_id(0);
i < gradMin_numElements;
i+= NUM_OF_THREADS_PER_BLOCK) {
is_perturb_gt_gene_min[i] = (local_perturbation[i] >= gradMin_conformation_min_perturbation[i]);
}
barrier(CLK_LOCAL_MEM_FENCE);
// Reduce all is_perturb_gt_gene_min's
// into their corresponding genotype
for(uint i=get_local_id(0);
i<gradMin_numElements;
i+=NUM_OF_THREADS_PER_BLOCK) {
is_perturb_gt_genotype = is_perturb_gt_genotype && is_perturb_gt_gene_min[i];
}
if (get_local_id(0) == 0) {
// Reduce all is_perturb_gt_gene_min's
// into their corresponding genotype
for(uint i = 0;
i < gradMin_numElements;
i++) {
*is_perturb_gt_genotype = *is_perturb_gt_genotype && is_perturb_gt_gene_min[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
// Reduce all three previous
// partial evaluations (gNorm, nIter, perturb) into a final one
if (get_local_id(0) == 0) {
is_gradDescentEn[0] = is_gNorm_gt_gMin && is_nIter_lt_maxIter && is_perturb_gt_genotype;
}
// Reduce all three previous
// partial evaluations (gNorm, nIter, perturb) into a final one
is_gradDescentEn[0] = *is_gNorm_gt_gMin && *is_nIter_lt_maxIter && *is_perturb_gt_genotype;
}
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -47,81 +52,72 @@ bool is_gradDescent_enabled(__local float* a_gNorm,
}
void stepGPU (// Args for minimization
__local float* local_genotype, // originally as "d_x"
__local float* local_genotype_new, // originally as "d_xnew"
__local float* local_genotype_diff, // originally as "d_xdiff"
__local float* local_gradient, // originally as "d_g"
float gradMin_alpha, // originally as "alpha"
float gradMin_h, // originally as "h"
unsigned int gradMin_inputSize, // originally as "M". initially labelled as "gradMin_M"
__local float* local_genotype, // originally as "d_x"
__local float* local_genotype_new, // originally as "d_xnew"
__local float* local_genotype_diff, // originally as "d_xdiff"
__local float* local_gradient, // originally as "d_g"
float gradMin_alpha, // originally as "alpha"
float gradMin_h, // originally as "h"
uint gradMin_inputSize, // originally as "M". initially labelled as "gradMin_M"
// Args for energy and gradient calculation
int dockpars_rotbondlist_length,
char dockpars_num_of_atoms,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
#if defined (RESTRICT_ARGS)
__global const float* restrict dockpars_fgrids, // cannot be allocated in __constant (too large)
#else
__global const float* dockpars_fgrids, // cannot be allocated in __constant (too large)
#endif
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
float dockpars_grid_spacing,
float dockpars_coeff_elec,
float dockpars_qasp,
float dockpars_coeff_desolv,
__local float* genotype,
__local float* energy,
__local int* run_id,
int dockpars_rotbondlist_length,
char dockpars_num_of_atoms,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
__global const float* restrict dockpars_fgrids, // This is too large to be allocated in __constant
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
float dockpars_grid_spacing,
float dockpars_coeff_elec,
float dockpars_qasp,
float dockpars_coeff_desolv,
__local float* genotype,
__local float* energy,
__local int* run_id,
// Some OpenCL compilers don't allow declaring
// local variables within non-kernel functions.
// These local variables must be declared in a kernel,
// and then passed to non-kernel functions.
__local float* calc_coords_x,
__local float* calc_coords_y,
__local float* calc_coords_z,
__local float* partial_energies,
__constant float* atom_charges_const,
__constant char* atom_types_const,
__constant char* intraE_contributors_const,
__constant float* VWpars_AC_const,
__constant float* VWpars_BD_const,
__constant float* dspars_S_const,
__constant float* dspars_V_const,
__constant int* rotlist_const,
__constant float* ref_coords_x_const,
__constant float* ref_coords_y_const,
__constant float* ref_coords_z_const,
__constant float* rotbonds_moving_vectors_const,
__constant float* rotbonds_unit_vectors_const,
__constant float* ref_orientation_quats_const
// -------------------------------------------------------------------
// L30nardoSV
// Gradient-related arguments
// Calculate gradients (forces) for intermolecular energy
// Derived from autodockdev/maps.py
// -------------------------------------------------------------------
__local float* calc_coords_x,
__local float* calc_coords_y,
__local float* calc_coords_z,
__local float* partial_energies,
__constant float* atom_charges_const,
__constant char* atom_types_const,
__constant char* intraE_contributors_const,
__constant float* VWpars_AC_const,
__constant float* VWpars_BD_const,
__constant float* dspars_S_const,
__constant float* dspars_V_const,
__constant int* rotlist_const,
__constant float* ref_coords_x_const,
__constant float* ref_coords_y_const,
__constant float* ref_coords_z_const,
__constant float* rotbonds_moving_vectors_const,
__constant float* rotbonds_unit_vectors_const,
__constant float* ref_orientation_quats_const
// Gradient-related arguments
// Calculate gradients (forces) for intermolecular energy
// Derived from autodockdev/maps.py
// "is_enabled_gradient_calc": enables gradient calculation.
// In Genetic-Generation: no need for gradients
// In Gradient-Minimizer: must calculate gradients
,
__local bool* is_enabled_gradient_calc,
__local float* gradient_inter_x,
__local float* gradient_inter_y,
__local float* gradient_inter_z,
__local float* gradient_genotype
// "is_enabled_gradient_calc": enables gradient calculation.
// In Genetic-Generation: no need for gradients
// In Gradient-Minimizer: must calculate gradients
,
__local bool* is_enabled_gradient_calc,
__local float* gradient_inter_x,
__local float* gradient_inter_y,
__local float* gradient_inter_z,
__local float* gradient_genotype
)
{
// Calculate gradient
// =============================================================
gpu_calc_energy(dockpars_rotbondlist_length,
dockpars_num_of_atoms,
......@@ -161,85 +157,60 @@ void stepGPU (// Args for minimization
rotbonds_moving_vectors_const,
rotbonds_unit_vectors_const,
ref_orientation_quats_const
// -------------------------------------------------------------------
// L30nardoSV
// Gradient-related arguments
// Calculate gradients (forces) for intermolecular energy
// Derived from autodockdev/maps.py
// -------------------------------------------------------------------
,
is_enabled_gradient_calc,
gradient_inter_x,
gradient_inter_y,
gradient_inter_z,
gradient_genotype
);
// -------------------------------------------------------------------
// =============================================================
for(uint i = get_local_id(0);
i < gradMin_inputSize;
i+= NUM_OF_THREADS_PER_BLOCK) {
// TODO: Transform gradients_inter_{x|y|z}
// into local_gradients[i] (with four quaternion genes)
// Derived from autodockdev/motions.py/forces_to_delta_genes()
// TODO: Transform local_gradients[i] (with four quaternion genes)
// into local_gradients[i] (with three Shoemake genes)
// Derived from autodockdev/motions.py/_get_cube3_gradient()
for(unsigned int i=get_local_id(0);
i<gradMin_inputSize;
i+=NUM_OF_THREADS_PER_BLOCK) {
// Take step
// FIXME: add conditional evaluation of max grad
// Taking step
local_genotype_new[i] = local_genotype[i] - gradMin_alpha * local_gradient[i];
// Update termination metrics
// Updating termination metrics
local_genotype_diff[i] = local_genotype_new[i] - local_genotype[i];
// Update current solution
// Updating current solution
local_genotype[i] = local_genotype_new[i];
}
}
float inner_product(__local float* vector1,
__local float* vector2,
uint inputSize,
__local float* init) {
float inner_product(__local float* vector1,
__local float* vector2,
unsigned int inputSize,
__local float* init) {
float temp = 0.0f;
if(get_local_id(0) == 0) {
init[0] = 0.0f;
}
// Element-wise multiplication
for(uint i = get_local_id(0);
i < inputSize;
i+= NUM_OF_THREADS_PER_BLOCK) {
init[i] = vector1[i] * vector2[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
for(unsigned int i=get_local_id(0);
i<inputSize;
i+=NUM_OF_THREADS_PER_BLOCK) {
init[0] += vector1[i] * vector2[i];
}
// Accumulating dot product
if(get_local_id(0) == 0) {
for(uint i = 0;
i < inputSize;
i ++) {
temp += init[i];
}
init [0] = temp;
}
barrier(CLK_LOCAL_MEM_FENCE);
return init[0];
}
// Implementation of gradient calculator
// Originally written in Python by Diogo Martins
// Initially coded within gpu_calc_energy()
......@@ -26,50 +26,47 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
// All related pragmas are in defines.h (accesible by host and device code)
void gpu_calc_energy( int dockpars_rotbondlist_length,
char dockpars_num_of_atoms,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
#if defined (RESTRICT_ARGS)