Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
docking
ocladock
Commits
d266192b
Commit
d266192b
authored
Apr 09, 2018
by
Leonardo Solis
Browse files
cleaned up and minor corrections
parent
cd23a9ad
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
d266192b
...
...
@@ -199,7 +199,7 @@ odock: check-env-all stringify $(SRC)
# Example
PDB
:=
3ce3
NRUN
:=
100
POPSIZE
:=
1
50
POPSIZE
:=
50
0
TESTNAME
:=
test
test
:
odock
...
...
common/defines.h
View file @
d266192b
...
...
@@ -39,26 +39,24 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define NUM_OF_THREADS_PER_BLOCK 64
#endif
#define MAX_NUM_OF_ATOMS
256
#define MAX_NUM_OF_ATYPES
14
#define MAX_NUM_OF_ROTBONDS
32
#define MAX_INTRAE_CONTRIBUTORS MAX_NUM_OF_ATOMS * MAX_NUM_OF_ATOMS
#define MAX_NUM_OF_ROTATIONS
MAX_NUM_OF_ATOMS * MAX_NUM_OF_ROTBONDS
#define MAX_POPSIZE
2048
#define MAX_NUM_OF_RUNS
1000
#define MAX_NUM_OF_ATOMS 256
#define MAX_NUM_OF_ATYPES 14
#define MAX_NUM_OF_ROTBONDS 32
#define MAX_INTRAE_CONTRIBUTORS
(
MAX_NUM_OF_ATOMS * MAX_NUM_OF_ATOMS
)
#define MAX_NUM_OF_ROTATIONS
(
MAX_NUM_OF_ATOMS * MAX_NUM_OF_ROTBONDS
)
#define MAX_POPSIZE 2048
#define MAX_NUM_OF_RUNS 1000
// Must be bigger than MAX_NUM_OF_ROTBONDS+6
#define GENOTYPE_LENGTH_IN_GLOBMEM 64
#define ACTUAL_GENOTYPE_LENGTH (MAX_NUM_OF_ROTBONDS+6)
#define LS_EXP_FACTOR
2.0f
#define LS_CONT_FACTOR
0.5f
#define LS_EXP_FACTOR 2.0f
#define LS_CONT_FACTOR 0.5f
// Improvements over Pechan's implementation
#define NATIVE_PRECISION
#define ASYNC_COPY
#define IMPROVE_GRID
#define RESTRICT_ARGS
#define MAPPED_COPY
#endif
/* DEFINES_H_ */
device/auxiliary_genetic.cl
View file @
d266192b
...
...
@@ -25,12 +25,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//
-------------------------------------------------------
//
//
-------------------------------------------------------
unsigned
int
gpu_rand
(
#
if
defined
(
RESTRICT_ARGS
)
__global
unsigned
int*
restrict
prng_states
#
else
__global
unsigned
int*
prng_states
#
endif
uint
gpu_rand
(
__global
uint*
restrict
prng_states
)
//The
GPU
device
function
generates
a
random
int
//with
a
linear
congruential
generator.
...
...
@@ -39,20 +35,20 @@ unsigned int gpu_rand(
//prng_states
(
thread
with
ID
tx
in
block
with
ID
bx
stores
its
state
in
prng_states[bx*NUM_OF_THREADS_PER_BLOCK+$
//The
random
number
generator
uses
the
gcc
linear
congruential
generator
constants.
{
unsigned
int
state
;
u
int
state
;
#
if
defined
(
REPRO
)
state
=
1
;
#
else
//
c
urrent
state
of
the
threads
own
PRNG
//state
=
prng_states[get_group_id
(
0
)
*NUM_OF_THREADS_PER_BLOCK
+
get_local_id
(
0
)
]
;
//
C
urrent
state
of
the
threads
own
PRNG
//
state
=
prng_states[get_group_id
(
0
)
*NUM_OF_THREADS_PER_BLOCK
+
get_local_id
(
0
)
]
;
state
=
prng_states[get_global_id
(
0
)
]
;
//c
alculating
next
state
state
=
(
RAND_A*state+RAND_C
)
;
//
C
alculating
next
state
state
=
(
RAND_A*state+RAND_C
)
;
#
endif
//
s
aving
next
state
to
memory
//prng_states[get_group_id
(
0
)
*NUM_OF_THREADS_PER_BLOCK
+
get_local_id
(
0
)
]
=
state
;
//
S
aving
next
state
to
memory
//
prng_states[get_group_id
(
0
)
*NUM_OF_THREADS_PER_BLOCK
+
get_local_id
(
0
)
]
=
state
;
prng_states[get_global_id
(
0
)
]
=
state
;
return
state
;
...
...
@@ -62,19 +58,15 @@ unsigned int gpu_rand(
//
//
-------------------------------------------------------
float
gpu_randf
(
#
if
defined
(
RESTRICT_ARGS
)
__global
unsigned
int*
restrict
prng_states
#
else
__global
unsigned
int*
prng_states
#
endif
__global
uint*
restrict
prng_states
)
//The
GPU
device
function
generates
a
//random
float
greater
than
(
or
equal
to
)
0
and
less
than
1.
//It
uses
gpu_rand
()
function.
{
float
state
;
float
state
;
//
s
tate
will
be
between
0
and
1
//
S
tate
will
be
between
0
and
1
#
if
defined
(
REPRO
)
state
=
0.55f
; //0.55f;
#
else
...
...
@@ -94,50 +86,33 @@ float gpu_randf(
//
//
-------------------------------------------------------
void
map_angle
(
__local
float*
angle
)
//The
GPU
device
function
maps
//the
input
parameter
to
the
interval
0...360
//
(
supposing
that
it
is
an
angle
)
.
//
The
GPU
device
function
maps
//
the
input
parameter
to
the
interval
0...360
//
(
supposing
that
it
is
an
angle
)
.
{
while
(
*angle
>=
360.0f
)
*angle
-=
360.0f
;
while
(
*angle
>=
360.0f
)
{
*angle
-=
360.0f
;
}
while
(
*angle
<
0.0f
)
*angle
+=
360.0f
;
while
(
*angle
<
0.0f
)
{
*angle
+=
360.0f
;
}
}
//
-------------------------------------------------------
//
//
-------------------------------------------------------
void
gpu_perform_elitist_selection
(
int
dockpars_pop_size,
#
if
defined
(
RESTRICT_ARGS
)
void
gpu_perform_elitist_selection
(
int
dockpars_pop_size,
__global
float*
restrict
dockpars_energies_current,
__global
float*
restrict
dockpars_energies_next,
__global
int*
restrict
dockpars_evals_of_new_entities,
#
else
__global
float*
dockpars_energies_current,
__global
float*
dockpars_energies_next,
__global
int*
dockpars_evals_of_new_entities,
#
endif
int
dockpars_num_of_genes,
#
if
defined
(
RESTRICT_ARGS
)
__global
float*
restrict
dockpars_conformations_next,
__global
const
float*
restrict
dockpars_conformations_current
#
else
__global
float*
dockpars_conformations_next,
__global
const
float*
dockpars_conformations_current
#
endif
,
//
Some
OpenCL
compilers
don
't
allow
local
var
outside
kernels
//
so
this
local
vars
are
passed
from
a
kernel
__local
float*
best_energies,
__local
int*
best_IDs,
__local
int*
best_ID
__global
const
float*
restrict
dockpars_conformations_current,
__local
float*
best_energies,
__local
int*
best_IDs,
__local
int*
best_ID
)
//The
GPU
device
function
performs
elitist
selection,
//that
is,
it
looks
for
the
best
entity
in
conformations_current
and
...
...
@@ -145,68 +120,59 @@ void gpu_perform_elitist_selection(int dockpars_pop_size,
//and
copies
it
to
the
place
of
the
first
entity
in
//conformations_next
and
energies_next.
{
int
entity_counter
;
int
gene_counter
;
float
best_energy
;
//
Some
OpenCL
compilers
don
't
allow
local
var
outside
kernels
//
so
this
local
vars
are
passed
from
a
kernel
//__local
float
best_energies[NUM_OF_THREADS_PER_BLOCK]
;
//__local
int
best_IDs[NUM_OF_THREADS_PER_BLOCK]
;
//__local
int
best_ID
;
if
(
get_local_id
(
0
)
<
dockpars_pop_size
)
{
if
(
get_local_id
(
0
)
<
dockpars_pop_size
)
{
best_energies[get_local_id
(
0
)
]
=
dockpars_energies_current[get_group_id
(
0
)
+get_local_id
(
0
)
]
;
best_IDs[get_local_id
(
0
)
]
=
get_local_id
(
0
)
;
}
for
(
entity_counter
=
NUM_OF_THREADS_PER_BLOCK+get_local_id
(
0
)
;
entity_counter
<
dockpars_pop_size
;
entity_counter+=NUM_OF_THREADS_PER_BLOCK
)
for
(
entity_counter
=
NUM_OF_THREADS_PER_BLOCK+get_local_id
(
0
)
;
entity_counter
<
dockpars_pop_size
;
entity_counter+=
NUM_OF_THREADS_PER_BLOCK
)
{
if
(
dockpars_energies_current[get_group_id
(
0
)
+entity_counter]
<
best_energies[get_local_id
(
0
)
]
)
{
if
(
dockpars_energies_current[get_group_id
(
0
)
+entity_counter]
<
best_energies[get_local_id
(
0
)
]
)
{
best_energies[get_local_id
(
0
)
]
=
dockpars_energies_current[get_group_id
(
0
)
+entity_counter]
;
best_IDs[get_local_id
(
0
)
]
=
entity_counter
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
t
his
could
be
implemented
with
a
tree-like
structure
//which
may
be
slightly
faster
//
T
his
could
be
implemented
with
a
tree-like
structure
//
which
may
be
slightly
faster
if
(
get_local_id
(
0
)
==
0
)
{
best_energy
=
best_energies[0]
;
//best_ID
=
best_IDs[0]
;
best_ID[0]
=
best_IDs[0]
;
for
(
entity_counter
=
1
;
entity_counter
<
NUM_OF_THREADS_PER_BLOCK
;
entity_counter++
)
for
(
entity_counter
=
1
;
entity_counter
<
NUM_OF_THREADS_PER_BLOCK
;
entity_counter++
)
{
if
((
best_energies[entity_counter]
<
best_energy
)
&&
(
entity_counter
<
dockpars_pop_size
))
{
if
((
best_energies[entity_counter]
<
best_energy
)
&&
(
entity_counter
<
dockpars_pop_size
))
{
best_energy
=
best_energies[entity_counter]
;
//best_ID
=
best_IDs[entity_counter]
;
best_ID[0]
=
best_IDs[entity_counter]
;
}
}
//
s
etting
energy
value
of
new
entity
//
S
etting
energy
value
of
new
entity
dockpars_energies_next[get_group_id
(
0
)
]
=
best_energy
;
//
0
evals
were
performed
for
entity
selected
with
elitism
(
since
it
was
copied
only
)
//
Zero
(
0
)
evals
were
performed
for
entity
selected
with
elitism
(
since
it
was
copied
only
)
dockpars_evals_of_new_entities[get_group_id
(
0
)
]
=
0
;
}
//
now
best_id
stores
the
id
of
the
best
entity
in
the
population,
//
c
opying
genotype
and
energy
value
to
the
first
entity
of
new
population
//
"
best_id
"
stores
the
id
of
the
best
entity
in
the
population,
//
C
opying
genotype
and
energy
value
to
the
first
entity
of
new
population
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
gene_counter=get_local_id
(
0
)
;
gene_counter<dockpars_num_of_genes
;
gene_counter+=NUM_OF_THREADS_PER_BLOCK
)
//dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+gene_counter]
=
dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID+gene_counter]
;
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+gene_counter]
=
dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID[0]+gene_counter]
;
for
(
gene_counter
=
get_local_id
(
0
)
;
gene_counter
<
dockpars_num_of_genes
;
gene_counter+=
NUM_OF_THREADS_PER_BLOCK
)
{
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+gene_counter]
=
dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+
GENOTYPE_LENGTH_IN_GLOBMEM*best_ID[0]+gene_counter]
;
}
}
device/auxiliary_gradient.cl
View file @
d266192b
//
Implementation
of
auxiliary
functions
for
the
gradient-based
minimizer
bool
is_gradDescent_enabled
(
__local
float*
a_gNorm,
float
gradMin_tol,
__local
unsigned
int*
a_nIter,
unsigned
int
gradMin_maxiter,
__local
float*
a_perturbation,
__constant
float*
gradMin_conformation_min_perturbation,
__local
bool*
is_gradDescentEn,
uint
gradMin_numElements
)
{
bool
is_gNorm_gt_gMin
=
(
a_gNorm[0]
>=
gradMin_tol
)
;
bool
is_nIter_lt_maxIter
=
(
a_nIter[0]
<=
gradMin_maxiter
)
;
bool
is_perturb_gt_gene_min
[ACTUAL_GENOTYPE_LENGTH]
;
bool
is_perturb_gt_genotype
=
true
;
//
Implementation
of
auxiliary
functions
//
for
the
gradient-based
minimizer
bool
is_gradDescent_enabled
(
__local
bool*
is_gNorm_gt_gMin,
__local
bool*
is_nIter_lt_maxIter,
__local
bool*
is_perturb_gt_gene_min,
__local
bool*
is_perturb_gt_genotype,
__local
float*
local_gNorm,
float
gradMin_tol,
__local
uint*
local_nIter,
uint
gradMin_maxiter,
__local
float*
local_perturbation,
__constant
float*
gradMin_conformation_min_perturbation,
__local
bool*
is_gradDescentEn,
uint
gradMin_numElements
)
{
if
(
get_local_id
(
0
)
==
0
)
{
*is_gNorm_gt_gMin
=
(
local_gNorm[0]
>=
gradMin_tol
)
;
*is_nIter_lt_maxIter
=
(
local_nIter[0]
<=
gradMin_maxiter
)
;
*is_perturb_gt_genotype
=
true
;
}
//
For
every
gene,
let
's
determine
//
if
perturbation
is
greater
than
min
conformation
for
(
uint
i
=
get_local_id
(
0
)
;
i
<
gradMin_numElements
;
i+=NUM_OF_THREADS_PER_BLOCK
)
{
is_perturb_gt_gene_min[i]
=
(
a
_perturbation[i]
>=
gradMin_conformation_min_perturbation[i]
)
;
for
(
uint
i
=
get_local_id
(
0
)
;
i
<
gradMin_numElements
;
i+=
NUM_OF_THREADS_PER_BLOCK
)
{
is_perturb_gt_gene_min[i]
=
(
local
_perturbation[i]
>=
gradMin_conformation_min_perturbation[i]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Reduce
all
is_perturb_gt_gene_min
's
//
into
their
corresponding
genotype
for
(
uint
i=get_local_id
(
0
)
;
i<gradMin_numElements
;
i+=NUM_OF_THREADS_PER_BLOCK
)
{
is_perturb_gt_genotype
=
is_perturb_gt_genotype
&&
is_perturb_gt_gene_min[i]
;
}
if
(
get_local_id
(
0
)
==
0
)
{
//
Reduce
all
is_perturb_gt_gene_min
's
//
into
their
corresponding
genotype
for
(
uint
i
=
0
;
i
<
gradMin_numElements
;
i++
)
{
*is_perturb_gt_genotype
=
*is_perturb_gt_genotype
&&
is_perturb_gt_gene_min[i]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Reduce
all
three
previous
//
partial
evaluations
(
gNorm,
nIter,
perturb
)
into
a
final
one
if
(
get_local_id
(
0
)
==
0
)
{
is_gradDescentEn[0]
=
is_gNorm_gt_gMin
&&
is_nIter_lt_maxIter
&&
is_perturb_gt_genotype
;
}
//
Reduce
all
three
previous
//
partial
evaluations
(
gNorm,
nIter,
perturb
)
into
a
final
one
is_gradDescentEn[0]
=
*is_gNorm_gt_gMin
&&
*is_nIter_lt_maxIter
&&
*is_perturb_gt_genotype
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -47,81 +52,72 @@ bool is_gradDescent_enabled(__local float* a_gNorm,
}
void
stepGPU
(
//
Args
for
minimization
__local
float*
local_genotype,
//
originally
as
"d_x"
__local
float*
local_genotype_new,
//
originally
as
"d_xnew"
__local
float*
local_genotype_diff,
//
originally
as
"d_xdiff"
__local
float*
local_gradient,
//
originally
as
"d_g"
float
gradMin_alpha,
//
originally
as
"alpha"
float
gradMin_h,
//
originally
as
"h"
u
nsigned
int
gradMin_inputSize,
//
originally
as
"M"
.
initially
labelled
as
"gradMin_M"
__local
float*
local_genotype,
//
originally
as
"d_x"
__local
float*
local_genotype_new,
//
originally
as
"d_xnew"
__local
float*
local_genotype_diff,
//
originally
as
"d_xdiff"
__local
float*
local_gradient,
//
originally
as
"d_g"
float
gradMin_alpha,
//
originally
as
"alpha"
float
gradMin_h,
//
originally
as
"h"
uint
gradMin_inputSize,
//
originally
as
"M"
.
initially
labelled
as
"gradMin_M"
//
Args
for
energy
and
gradient
calculation
int
dockpars_rotbondlist_length,
char
dockpars_num_of_atoms,
char
dockpars_gridsize_x,
char
dockpars_gridsize_y,
char
dockpars_gridsize_z,
#
if
defined
(
RESTRICT_ARGS
)
__global
const
float*
restrict
dockpars_fgrids,
//
cannot
be
allocated
in
__constant
(
too
large
)
#
else
__global
const
float*
dockpars_fgrids,
//
cannot
be
allocated
in
__constant
(
too
large
)
#
endif
char
dockpars_num_of_atypes,
int
dockpars_num_of_intraE_contributors,
float
dockpars_grid_spacing,
float
dockpars_coeff_elec,
float
dockpars_qasp,
float
dockpars_coeff_desolv,
__local
float*
genotype,
__local
float*
energy,
__local
int*
run_id,
int
dockpars_rotbondlist_length,
char
dockpars_num_of_atoms,
char
dockpars_gridsize_x,
char
dockpars_gridsize_y,
char
dockpars_gridsize_z,
__global
const
float*
restrict
dockpars_fgrids,
//
This
is
too
large
to
be
allocated
in
__constant
char
dockpars_num_of_atypes,
int
dockpars_num_of_intraE_contributors,
float
dockpars_grid_spacing,
float
dockpars_coeff_elec,
float
dockpars_qasp,
float
dockpars_coeff_desolv,
__local
float*
genotype,
__local
float*
energy,
__local
int*
run_id,
//
Some
OpenCL
compilers
don
't
allow
declaring
//
local
variables
within
non-kernel
functions.
//
These
local
variables
must
be
declared
in
a
kernel,
//
and
then
passed
to
non-kernel
functions.
__local
float*
calc_coords_x,
__local
float*
calc_coords_y,
__local
float*
calc_coords_z,
__local
float*
partial_energies,
__constant
float*
atom_charges_const,
__constant
char*
atom_types_const,
__constant
char*
intraE_contributors_const,
__constant
float*
VWpars_AC_const,
__constant
float*
VWpars_BD_const,
__constant
float*
dspars_S_const,
__constant
float*
dspars_V_const,
__constant
int*
rotlist_const,
__constant
float*
ref_coords_x_const,
__constant
float*
ref_coords_y_const,
__constant
float*
ref_coords_z_const,
__constant
float*
rotbonds_moving_vectors_const,
__constant
float*
rotbonds_unit_vectors_const,
__constant
float*
ref_orientation_quats_const
//
-------------------------------------------------------------------
//
L30nardoSV
//
Gradient-related
arguments
//
Calculate
gradients
(
forces
)
for
intermolecular
energy
//
Derived
from
autodockdev/maps.py
//
-------------------------------------------------------------------
__local
float*
calc_coords_x,
__local
float*
calc_coords_y,
__local
float*
calc_coords_z,
__local
float*
partial_energies,
__constant
float*
atom_charges_const,
__constant
char*
atom_types_const,
__constant
char*
intraE_contributors_const,
__constant
float*
VWpars_AC_const,
__constant
float*
VWpars_BD_const,
__constant
float*
dspars_S_const,
__constant
float*
dspars_V_const,
__constant
int*
rotlist_const,
__constant
float*
ref_coords_x_const,
__constant
float*
ref_coords_y_const,
__constant
float*
ref_coords_z_const,
__constant
float*
rotbonds_moving_vectors_const,
__constant
float*
rotbonds_unit_vectors_const,
__constant
float*
ref_orientation_quats_const
//
Gradient-related
arguments
//
Calculate
gradients
(
forces
)
for
intermolecular
energy
//
Derived
from
autodockdev/maps.py
//
"is_enabled_gradient_calc"
:
enables
gradient
calculation.
//
In
Genetic-Generation:
no
need
for
gradients
//
In
Gradient-Minimizer:
must
calculate
gradients
,
__local
bool*
is_enabled_gradient_calc,
__local
float*
gradient_inter_x,
__local
float*
gradient_inter_y,
__local
float*
gradient_inter_z,
__local
float*
gradient_genotype
//
"is_enabled_gradient_calc"
:
enables
gradient
calculation.
//
In
Genetic-Generation:
no
need
for
gradients
//
In
Gradient-Minimizer:
must
calculate
gradients
,
__local
bool*
is_enabled_gradient_calc,
__local
float*
gradient_inter_x,
__local
float*
gradient_inter_y,
__local
float*
gradient_inter_z,
__local
float*
gradient_genotype
)
{
//
Calculate
gradient
//
=============================================================
gpu_calc_energy
(
dockpars_rotbondlist_length,
dockpars_num_of_atoms,
...
...
@@ -161,85 +157,60 @@ void stepGPU (// Args for minimization
rotbonds_moving_vectors_const,
rotbonds_unit_vectors_const,
ref_orientation_quats_const
//
-------------------------------------------------------------------
//
L30nardoSV
//
Gradient-related
arguments
//
Calculate
gradients
(
forces
)
for
intermolecular
energy
//
Derived
from
autodockdev/maps.py
//
-------------------------------------------------------------------
,
is_enabled_gradient_calc,
gradient_inter_x,
gradient_inter_y,
gradient_inter_z,
gradient_genotype
)
;
//
-------------------------------------------------------------------
//
=============================================================
for
(
uint
i
=
get_local_id
(
0
)
;
i
<
gradMin_inputSize
;
i+=
NUM_OF_THREADS_PER_BLOCK
)
{
//
TODO:
Transform
gradients_inter_{x|y|z}
//
into
local_gradients[i]
(
with
four
quaternion
genes
)
//
Derived
from
autodockdev/motions.py/forces_to_delta_genes
()
//
TODO:
Transform
local_gradients[i]
(
with
four
quaternion
genes
)
//
into
local_gradients[i]
(
with
three
Shoemake
genes
)
//
Derived
from
autodockdev/motions.py/_get_cube3_gradient
()
for
(
unsigned
int
i=get_local_id
(
0
)
;
i<gradMin_inputSize
;
i+=NUM_OF_THREADS_PER_BLOCK
)
{
//
Take
step
//
FIXME:
add
conditional
evaluation
of
max
grad
//
Taking
step
local_genotype_new[i]
=
local_genotype[i]
-
gradMin_alpha
*
local_gradient[i]
;
//
Updat
e
termination
metrics
//
Updat
ing
termination
metrics
local_genotype_diff[i]
=
local_genotype_new[i]
-
local_genotype[i]
;
//
Updat
e
current
solution
//
Updat
ing
current
solution
local_genotype[i]
=
local_genotype_new[i]
;
}
}
float
inner_product
(
__local
float*
vector1,
__local
float*
vector2,
uint
inputSize,
__local
float*
init
)
{
float
inner_product
(
__local
float*
vector1,
__local
float*
vector2,
unsigned
int
inputSize,
__local
float*
init
)
{
float
temp
=
0.0f
;
if
(
get_local_id
(
0
)
==
0
)
{
init[0]
=
0.0f
;
}
//
Element-wise
multiplication
for
(
uint
i
=
get_local_id
(
0
)
;
i
<
inputSize
;
i+=
NUM_OF_THREADS_PER_BLOCK
)
{
init[i]
=
vector1[i]
*
vector2[i]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
unsigned
int
i=get_local_id
(
0
)
;
i<inputSize
;
i+=NUM_OF_THREADS_PER_BLOCK
)
{
init[0]
+=
vector1[i]
*
vector2[i]
;
}
//
Accumulating
dot
product
if
(
get_local_id
(
0
)
==
0
)
{
for
(
uint
i
=
0
;
i
<
inputSize
;
i
++
)
{
temp
+=
init[i]
;
}
init
[0]
=
temp
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
return
init[0]
;
}
//
Implementation
of
gradient
calculator
//
Originally
written
in
Python
by
Diogo
Martins
//
Initially
coded
within
gpu_calc_energy
()
device/calcenergy.cl
View file @
d266192b
...
...
@@ -26,50 +26,47 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//
All
related
pragmas
are
in
defines.h
(
accesible
by
host
and
device
code
)
void
gpu_calc_energy
(
int
dockpars_rotbondlist_length,
char
dockpars_num_of_atoms,
char
dockpars_gridsize_x,
char
dockpars_gridsize_y,
char
dockpars_gridsize_z,