Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
docking
ocladock
Commits
fbf5743a
Commit
fbf5743a
authored
Mar 13, 2019
by
Leonardo Solis
Browse files
merging fastergrad onto master
parents
ebf9a4b0
2ce54688
Changes
100
Expand all
Hide whitespace changes
Inline
Side-by-side
Makefile
View file @
fbf5743a
...
...
@@ -50,7 +50,10 @@ K1_NAME="gpu_calc_initpop"
K2_NAME
=
"gpu_sum_evals"
K3_NAME
=
"perform_LS"
K4_NAME
=
"gpu_gen_and_eval_newpops"
K_NAMES
=
-DK1
=
$(K1_NAME)
-DK2
=
$(K2_NAME)
-DK3
=
$(K3_NAME)
-DK4
=
$(K4_NAME)
K5_NAME
=
"gradient_minSD"
K6_NAME
=
"gradient_minFire"
K7_NAME
=
"gradient_minAD"
K_NAMES
=
-DK1
=
$(K1_NAME)
-DK2
=
$(K2_NAME)
-DK3
=
$(K3_NAME)
-DK4
=
$(K4_NAME)
-DK5
=
$(K5_NAME)
-DK6
=
$(K6_NAME)
-DK7
=
$(K7_NAME)
# Kernel flags
KFLAGS
=
-DKRNL_SOURCE
=
$(KRNL_DIR)
/
$(KRNL_MAIN)
-DKRNL_DIRECTORY
=
$(KRNL_DIR)
-DKCMN_DIRECTORY
=
$(KCMN_DIR)
$(K_NAMES)
...
...
@@ -95,8 +98,10 @@ else
endif
# ------------------------------------------------------
# Configuration (Host)
# Valid values: RELEASE, DEBUG
# Configuration
# FDEBUG (full) : enables debugging on both host + device
# LDEBUG (light): enables debugging on host
# RELEASE
CONFIG
=
RELEASE
OCL_DEBUG_BASIC
=
-DPLATFORM_ATTRIBUTES_DISPLAY
\
...
...
@@ -113,7 +118,9 @@ OCL_DEBUG_ALL=$(OCL_DEBUG_BASIC) \
-DKERNEL_WORK_GROUP_INFO_DISPLAY
\
-DBUFFER_OBJECT_INFO_DISPLAY
ifeq
($(CONFIG),DEBUG)
ifeq
($(CONFIG),FDEBUG)
OPT
=
-O0
-g3
-Wall
$(OCL_DEBUG_ALL)
-DDOCK_DEBUG
else
ifeq
($(CONFIG),LDEBUG)
OPT
=
-O0
-g3
-Wall
$(OCL_DEBUG_BASIC)
else
ifeq
($(CONFIG),RELEASE)
OPT
=
-O3
...
...
@@ -122,18 +129,9 @@ else
endif
# ------------------------------------------------------
# Host and Device Debug
DOCK_DEBUG
=
NO
# Reproduce results (remove randomness)
REPRO
=
NO
ifeq
($(DOCK_DEBUG),YES)
DD
=
-DDOCK_DEBUG
else
DD
=
endif
ifeq
($(REPRO),YES)
REP
=
-DREPRO
else
...
...
@@ -188,11 +186,66 @@ check-env-gpu:
check-env-all
:
check-env-dev check-env-cpu check-env-gpu
# ------------------------------------------------------
# Priting out its git version hash
GIT_VERSION
:=
$(
shell
git describe
--abbrev
=
40
--dirty
--always
--tags
)
CFLAGS
+=
-DVERSION
=
\"
$(GIT_VERSION)
\"
# ------------------------------------------------------
stringify
:
./stringify_ocl_krnls.sh
odock
:
check-env-all stringify $(SRC)
g++
$(SRC)
$(CFLAGS)
-lOpenCL
-o
$(BIN_DIR)
/
$(TARGET)
$(DEV)
$(NWI)
$(OPT)
$(DD)
$(REP)
$(KFLAGS)
g++
\
$(SRC)
\
$(CFLAGS)
\
-lOpenCL
\
-o
$(BIN_DIR)
/
$(TARGET)
\
$(DEV)
$(NWI)
$(OPT)
$(DD)
$(REP)
$(KFLAGS)
# Example
# 1ac8: for testing gradients of translation and rotation genes
# 7cpa: for testing gradients of torsion genes (15 torsions)
# 3tmn: for testing gradients of torsion genes (1 torsion)
PDB
:=
3ce3
NRUN
:=
100
NGEN
:=
27000
POPSIZE
:=
150
TESTNAME
:=
test
TESTLS
:=
sw
test
:
odock
$(BIN_DIR)
/
$(TARGET)
\
-ffile
./input/
$(PDB)
/derived/
$(PDB)
_protein.maps.fld
\
-lfile
./input/
$(PDB)
/derived/
$(PDB)
_ligand.pdbqt
\
-nrun
$(NRUN)
\
-ngen
$(NGEN)
\
-psize
$(POPSIZE)
\
-resnam
$(TESTNAME)
\
-gfpop
0
\
-lsmet
$(TESTLS)
ASTEX_PDB
:=
2bsm
ASTEX_NRUN
:=
10
ASTEX_POPSIZE
:=
10
ASTEX_TESTNAME
:=
test_astex
ASTEX_LS
:=
sw
astex
:
odock
$(BIN_DIR)
/
$(TARGET)
\
-ffile
./input_tsri/search-set-astex/
$(ASTEX_PDB)
/protein.maps.fld
\
-lfile
./input_tsri/search-set-astex/
$(ASTEX_PDB)
/flex-xray.pdbqt
\
-nrun
$(ASTEX_NRUN)
\
-psize
$(ASTEX_POPSIZE)
\
-resnam
$(ASTEX_TESTNAME)
\
-gfpop
1
\
-lsmet
$(ASTEX_LS)
# $(BIN_DIR)/$(TARGET) -ffile ./input_tsri/search-set-astex/$(ASTEX_PDB)/protein.maps.fld -lfile ./input_tsri/search-set-astex/$(ASTEX_PDB)/flex-xray.pdbqt -nrun $(ASTEX_NRUN) -psize $(ASTEX_POPSIZE) -resnam $(ASTEX_TESTNAME) -gfpop 1 | tee ./input_tsri/search-set-astex/intrapairs/$(ASTEX_PDB)_intrapair.txt
PDB
:=
3ce3
NRUN
:=
100
...
...
README.md
View file @
fbf5743a
...
...
@@ -98,6 +98,7 @@ By default the output log file is written in the current working folder. Example
| -lsrat | Local-search rate | 6 (%) |
| -trat | Tournament rate | 60 (%) |
| -resnam | Name for docking output log | _"docking"_ |
| -hsym | Handle symmetry in RMSD calc. | 1 |
For a complete list of available arguments and their default values, check:
[
getparameters.cpp
](
host/src/getparameters.cpp
)
.
...
...
common/calcenergy_basic.h
View file @
fbf5743a
...
...
@@ -27,19 +27,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include
"defines.h"
#define RLIST_ATOMID_MASK
0x000000FF
#define RLIST_RBONDID_MASK
0x0000FF00
#define RLIST_RBONDID_SHIFT 8
#define RLIST_FIRSTROT_MASK 0x00010000
#define RLIST_GENROT_MASK
0x00020000
#define RLIST_DUMMY_MASK
0x00040000
#define DEG_TO_RAD
0.0174533f
#define RLIST_ATOMID_MASK 0x000000FF
#define RLIST_RBONDID_MASK 0x0000FF00
#define RLIST_RBONDID_SHIFT
8
#define RLIST_FIRSTROT_MASK
0x00010000
#define RLIST_GENROT_MASK 0x00020000
#define RLIST_DUMMY_MASK 0x00040000
#define DEG_TO_RAD 0.0174533f
// LCG: linear congruential generator constants
#define RAND_A
1103515245u
#define RAND_C
12345u
#define RAND_A 1103515245u
#define RAND_C 12345u
// WARNING: it is supposed that unsigned int is 32 bit long
#define MAX_UINT
4294967296.0f
#define MAX_UINT 4294967296.0f
// Macro for capturing grid values
// Original
...
...
@@ -57,4 +57,49 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
cube[0][0][1]*weights[0][0][1] +cube[1][0][1]*weights[1][0][1] + \
cube[0][1][1]*weights[0][1][1] +cube[1][1][1]*weights[1][1][1])
// Constants for dielelectric term of the
// electrostatic component of the intramolecular energy/gradient
#define DIEL_A -8.5525f
#define DIEL_WAT 78.4f
#define DIEL_B (DIEL_WAT - DIEL_A)
#define DIEL_LAMBDA 0.003627f
#define DIEL_H DIEL_LAMBDA
#define DIEL_K 7.7839f
#define DIEL_B_TIMES_H (DIEL_B * DIEL_H)
#define DIEL_B_TIMES_H_TIMES_K (DIEL_B_TIMES_H * DIEL_K)
// Used for Shoemake to quaternion transformation
#define PI_TIMES_2 (float)(2.0f*M_PI)
#define PI_FLOAT (float)(M_PI)
// -------------------------------------------
// Gradient-related defines
// -------------------------------------------
#define INFINITESIMAL_RADIAN 1E-3
#define HALF_INFINITESIMAL_RADIAN (0.5f * INFINITESIMAL_RADIAN)
#define INV_INFINITESIMAL_RADIAN (1/INFINITESIMAL_RADIAN)
#define COS_HALF_INFINITESIMAL_RADIAN cos(HALF_INFINITESIMAL_RADIAN)
#define SIN_HALF_INFINITESIMAL_RADIAN sin(HALF_INFINITESIMAL_RADIAN)
/*
#define TRANGENE_ALPHA 1E-3
#define ROTAGENE_ALPHA 1E-8
#define TORSGENE_ALPHA 1E-13
*/
#define STEP_INCREASE 1.2f
#define STEP_DECREASE 0.2f
#define STEP_START 1E3 // Starting step size. This might look gigantic but will cap
#define MAX_DEV_TRANSLATION 2.0f // 2 Angstrom, but must be divided by the gridspacing (store in variable)
//#define MAX_DEV_ROTATION 0.2f // Shoemake range [0, 1]
#define MAX_DEV_ROTATION 0.5f/DEG_TO_RAD // 0.5f RAD
#define MAX_DEV_TORSION 0.5f/DEG_TO_RAD // 0.5f RAD
#endif
/* CALCENERGY_BASIC_H_ */
common/defines.h
View file @
fbf5743a
...
...
@@ -39,7 +39,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define NUM_OF_THREADS_PER_BLOCK 64
#endif
#define ATYPE_NUM 22
#define ATYPE_NUM 22 // initial: 22
#define ATYPE_GETBONDS 16 // initial: 16
#define MAX_NUM_OF_ATOMS 256
#define MAX_NUM_OF_ATYPES 14
#define MAX_NUM_OF_ROTBONDS 32
...
...
@@ -47,19 +48,27 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define MAX_NUM_OF_ROTATIONS (MAX_NUM_OF_ATOMS * MAX_NUM_OF_ROTBONDS)
#define MAX_POPSIZE 2048
#define MAX_NUM_OF_RUNS 1000
#define MAX_NUM_GRIDPOINTS 256
// Must be bigger than MAX_NUM_OF_ROTBONDS+6
#define GENOTYPE_LENGTH_IN_GLOBMEM 64
#define ACTUAL_GENOTYPE_LENGTH (MAX_NUM_OF_ROTBONDS+6)
#define LS_EXP_FACTOR
2.0f
#define LS_CONT_FACTOR
0.5f
#define LS_EXP_FACTOR 2.0f
#define LS_CONT_FACTOR 0.5f
// Improvements over Pechan's implementation
#define NATIVE_PRECISION
#define ASYNC_COPY
#define IMPROVE_GRID
#define RESTRICT_ARGS
#define MAPPED_COPY
// TODO: convert this into a program arg
//#define GRADIENT_ENABLED
#endif
/* DEFINES_H_ */
device/auxiliary_genetic.cl
View file @
fbf5743a
...
...
@@ -25,12 +25,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//
-------------------------------------------------------
//
//
-------------------------------------------------------
unsigned
int
gpu_rand
(
#
if
defined
(
RESTRICT_ARGS
)
__global
unsigned
int*
restrict
prng_states
#
else
__global
unsigned
int*
prng_states
#
endif
uint
gpu_rand
(
__global
uint*
restrict
prng_states
)
//The
GPU
device
function
generates
a
random
int
//with
a
linear
congruential
generator.
...
...
@@ -39,20 +35,20 @@ unsigned int gpu_rand(
//prng_states
(
thread
with
ID
tx
in
block
with
ID
bx
stores
its
state
in
prng_states[bx*NUM_OF_THREADS_PER_BLOCK+$
//The
random
number
generator
uses
the
gcc
linear
congruential
generator
constants.
{
unsigned
int
state
;
u
int
state
;
#
if
defined
(
REPRO
)
state
=
1
;
#
else
//
c
urrent
state
of
the
threads
own
PRNG
//state
=
prng_states[get_group_id
(
0
)
*NUM_OF_THREADS_PER_BLOCK
+
get_local_id
(
0
)
]
;
//
C
urrent
state
of
the
threads
own
PRNG
//
state
=
prng_states[get_group_id
(
0
)
*NUM_OF_THREADS_PER_BLOCK
+
get_local_id
(
0
)
]
;
state
=
prng_states[get_global_id
(
0
)
]
;
//c
alculating
next
state
state
=
(
RAND_A*state+RAND_C
)
;
//
C
alculating
next
state
state
=
(
RAND_A*state+RAND_C
)
;
#
endif
//
s
aving
next
state
to
memory
//prng_states[get_group_id
(
0
)
*NUM_OF_THREADS_PER_BLOCK
+
get_local_id
(
0
)
]
=
state
;
//
S
aving
next
state
to
memory
//
prng_states[get_group_id
(
0
)
*NUM_OF_THREADS_PER_BLOCK
+
get_local_id
(
0
)
]
=
state
;
prng_states[get_global_id
(
0
)
]
=
state
;
return
state
;
...
...
@@ -62,29 +58,19 @@ unsigned int gpu_rand(
//
//
-------------------------------------------------------
float
gpu_randf
(
#
if
defined
(
RESTRICT_ARGS
)
__global
unsigned
int*
restrict
prng_states
#
else
__global
unsigned
int*
prng_states
#
endif
__global
uint*
restrict
prng_states
)
//The
GPU
device
function
generates
a
//random
float
greater
than
(
or
equal
to
)
0
and
less
than
1.
//It
uses
gpu_rand
()
function.
{
float
state
;
float
state
;
//
s
tate
will
be
between
0
and
1
//
S
tate
will
be
between
0
and
1
#
if
defined
(
REPRO
)
state
=
0.55f
; //0.55f;
#
else
#
if
defined
(
NATIVE_PRECISION
)
state
=
native_divide
(
gpu_rand
(
prng_states
)
,
MAX_UINT
)
*0.999999f
;
#
elif
defined
(
HALF_PRECISION
)
state
=
half_divide
(
gpu_rand
(
prng_states
)
,
MAX_UINT
)
*0.999999f
;
#
else
//
Full
precision
state
=
(((
float
)
gpu_rand
(
prng_states
))
/MAX_UINT
)
*0.999999f
;
#
endif
#
endif
return
state
;
...
...
@@ -94,50 +80,33 @@ float gpu_randf(
//
//
-------------------------------------------------------
void
map_angle
(
__local
float*
angle
)
//The
GPU
device
function
maps
//the
input
parameter
to
the
interval
0...360
//
(
supposing
that
it
is
an
angle
)
.
//
The
GPU
device
function
maps
//
the
input
parameter
to
the
interval
0...360
//
(
supposing
that
it
is
an
angle
)
.
{
while
(
*angle
>=
360.0f
)
*angle
-=
360.0f
;
while
(
*angle
>=
360.0f
)
{
*angle
-=
360.0f
;
}
while
(
*angle
<
0.0f
)
*angle
+=
360.0f
;
while
(
*angle
<
0.0f
)
{
*angle
+=
360.0f
;
}
}
//
-------------------------------------------------------
//
//
-------------------------------------------------------
void
gpu_perform_elitist_selection
(
int
dockpars_pop_size,
#
if
defined
(
RESTRICT_ARGS
)
void
gpu_perform_elitist_selection
(
int
dockpars_pop_size,
__global
float*
restrict
dockpars_energies_current,
__global
float*
restrict
dockpars_energies_next,
__global
int*
restrict
dockpars_evals_of_new_entities,
#
else
__global
float*
dockpars_energies_current,
__global
float*
dockpars_energies_next,
__global
int*
dockpars_evals_of_new_entities,
#
endif
int
dockpars_num_of_genes,
#
if
defined
(
RESTRICT_ARGS
)
__global
float*
restrict
dockpars_conformations_next,
__global
const
float*
restrict
dockpars_conformations_current
#
else
__global
float*
dockpars_conformations_next,
__global
const
float*
dockpars_conformations_current
#
endif
,
//
Some
OpenCL
compilers
don
't
allow
local
var
outside
kernels
//
so
this
local
vars
are
passed
from
a
kernel
__local
float*
best_energies,
__local
int*
best_IDs,
__local
int*
best_ID
__global
const
float*
restrict
dockpars_conformations_current,
__local
float*
best_energies,
__local
int*
best_IDs,
__local
int*
best_ID
)
//The
GPU
device
function
performs
elitist
selection,
//that
is,
it
looks
for
the
best
entity
in
conformations_current
and
...
...
@@ -145,68 +114,58 @@ void gpu_perform_elitist_selection(int dockpars_pop_size,
//and
copies
it
to
the
place
of
the
first
entity
in
//conformations_next
and
energies_next.
{
int
entity_counter
;
int
gene_counter
;
float
best_energy
;
//
Some
OpenCL
compilers
don
't
allow
local
var
outside
kernels
//
so
this
local
vars
are
passed
from
a
kernel
//__local
float
best_energies[NUM_OF_THREADS_PER_BLOCK]
;
//__local
int
best_IDs[NUM_OF_THREADS_PER_BLOCK]
;
//__local
int
best_ID
;
if
(
get_local_id
(
0
)
<
dockpars_pop_size
)
{
if
(
get_local_id
(
0
)
<
dockpars_pop_size
)
{
best_energies[get_local_id
(
0
)
]
=
dockpars_energies_current[get_group_id
(
0
)
+get_local_id
(
0
)
]
;
best_IDs[get_local_id
(
0
)
]
=
get_local_id
(
0
)
;
}
for
(
entity_counter
=
NUM_OF_THREADS_PER_BLOCK+get_local_id
(
0
)
;
entity_counter
<
dockpars_pop_size
;
entity_counter+=NUM_OF_THREADS_PER_BLOCK
)
for
(
entity_counter
=
NUM_OF_THREADS_PER_BLOCK+get_local_id
(
0
)
;
entity_counter
<
dockpars_pop_size
;
entity_counter+=
NUM_OF_THREADS_PER_BLOCK
)
{
if
(
dockpars_energies_current[get_group_id
(
0
)
+entity_counter]
<
best_energies[get_local_id
(
0
)
]
)
{
if
(
dockpars_energies_current[get_group_id
(
0
)
+entity_counter]
<
best_energies[get_local_id
(
0
)
]
)
{
best_energies[get_local_id
(
0
)
]
=
dockpars_energies_current[get_group_id
(
0
)
+entity_counter]
;
best_IDs[get_local_id
(
0
)
]
=
entity_counter
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
t
his
could
be
implemented
with
a
tree-like
structure
//which
may
be
slightly
faster
//
T
his
could
be
implemented
with
a
tree-like
structure
//
which
may
be
slightly
faster
if
(
get_local_id
(
0
)
==
0
)
{
best_energy
=
best_energies[0]
;
//best_ID
=
best_IDs[0]
;
best_ID[0]
=
best_IDs[0]
;
for
(
entity_counter
=
1
;
entity_counter
<
NUM_OF_THREADS_PER_BLOCK
;
entity_counter++
)
for
(
entity_counter
=
1
;
entity_counter
<
NUM_OF_THREADS_PER_BLOCK
;
entity_counter++
)
{
if
((
best_energies[entity_counter]
<
best_energy
)
&&
(
entity_counter
<
dockpars_pop_size
))
{
if
((
best_energies[entity_counter]
<
best_energy
)
&&
(
entity_counter
<
dockpars_pop_size
))
{
best_energy
=
best_energies[entity_counter]
;
//best_ID
=
best_IDs[entity_counter]
;
best_ID[0]
=
best_IDs[entity_counter]
;
}
}
//
s
etting
energy
value
of
new
entity
//
S
etting
energy
value
of
new
entity
dockpars_energies_next[get_group_id
(
0
)
]
=
best_energy
;
//
0
evals
were
performed
for
entity
selected
with
elitism
(
since
it
was
copied
only
)
//
Zero
(
0
)
evals
were
performed
for
entity
selected
with
elitism
(
since
it
was
copied
only
)
dockpars_evals_of_new_entities[get_group_id
(
0
)
]
=
0
;
}
//
now
best_id
stores
the
id
of
the
best
entity
in
the
population,
//
c
opying
genotype
and
energy
value
to
the
first
entity
of
new
population
//
"
best_id
"
stores
the
id
of
the
best
entity
in
the
population,
//
C
opying
genotype
and
energy
value
to
the
first
entity
of
new
population
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
gene_counter
=
get_local_id
(
0
)
;
gene_counter
<
dockpars_num_of_genes
;
gene_counter+=NUM_OF_THREADS_PER_BLOCK
)
//
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+gene_counter]
=
dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+
GENOTYPE_LENGTH_IN_GLOBMEM*best_ID+gene_counter]
;
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+gene_counter]
=
dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID[0]+gene_counter]
;
for
(
gene_counter
=
get_local_id
(
0
)
;
gene_counter
<
dockpars_num_of_genes
;
gene_counter+=
NUM_OF_THREADS_PER_BLOCK
)
{
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+gene_counter]
=
dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
+
GENOTYPE_LENGTH_IN_GLOBMEM*best_ID
[0]
+gene_counter]
;
}
}
device/calcMergedEneGra.cl
0 → 100644
View file @
fbf5743a
This diff is collapsed.
Click to expand it.
device/calcenergy.cl
View file @
fbf5743a
This diff is collapsed.
Click to expand it.
device/calcgradient.cl
0 → 100644
View file @
fbf5743a
This diff is collapsed.
Click to expand it.
device/kernel1.cl
View file @
fbf5743a
...
...
@@ -21,68 +21,68 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
//#define
DEBUG_ENERGY_KERNEL1
__kernel
void
__attribute__
((
reqd_work_group_size
(
NUM_OF_THREADS_PER_BLOCK,1,1
)))
gpu_calc_initpop
(
char
dockpars_num_of_atoms,
gpu_calc_initpop
(
char
dockpars_num_of_atoms,
char
dockpars_num_of_atypes,
int
dockpars_num_of_intraE_contributors,
char
dockpars_gridsize_x,
char
dockpars_gridsize_y,
char
dockpars_gridsize_z,
//
g1
=
gridsize_x
uint
dockpars_gridsize_x_times_y,
//
g2
=
gridsize_x
*
gridsize_y
uint
dockpars_gridsize_x_times_y_times_z,
//
g3
=
gridsize_x
*
gridsize_y
*
gridsize_z
float
dockpars_grid_spacing,
#
if
defined
(
RESTRICT_ARGS
)
__global
const
float*
restrict
dockpars_fgrids,
//
cannot
be
allocated
in
__constant
(
too
large
)
#
else
__global
const
float*
dockpars_fgrids,
//
cannot
be
allocated
in
__constant
(
too
large
)
#
endif
__global
const
float*
restrict
dockpars_fgrids,
//
This
is
too
large
to
be
allocated
in
__constant
int
dockpars_rotbondlist_length,
float
dockpars_coeff_elec,
float
dockpars_coeff_desolv,
#
if
defined
(
RESTRICT_ARGS
)
__global
const
float*
restrict
dockpars_conformations_current,
__global
float*
restrict
dockpars_energies_current,
__global
int*
restrict
dockpars_evals_of_new_entities,
#
else
__global
const
float*
dockpars_conformations_current,
__global
float*
dockpars_energies_current,
__global
int*
dockpars_evals_of_new_entities,
#
endif
__global
const
float*
restrict
dockpars_conformations_current,
__global
float*
restrict
dockpars_energies_current,
__global
int*
restrict
dockpars_evals_of_new_entities,
int
dockpars_pop_size,
float
dockpars_qasp,
float
dockpars_smooth,
float
dockpars_smooth,
__constant
kernelconstant_interintra*
kerconst_interintra,
__global
const
kernelconstant_intracontrib*
kerconst_intracontrib,
__constant
kernelconstant_intra*
kerconst_intra,
__constant
kernelconstant_rotlist*
kerconst_rotlist,
__constant
kernelconstant_conform*
kerconst_conform
__constant
kernelconstant_interintra*
kerconst_interintra,
__global
const
kernelconstant_intracontrib*
kerconst_intracontrib,
__constant
kernelconstant_intra*
kerconst_intra,
__constant
kernelconstant_rotlist*
kerconst_rotlist,
__constant
kernelconstant_conform*
kerconst_conform
)
{
//
Some
OpenCL
compilers
don
't
allow
declaring
//
local
variables
within
non-kernel
functions.
//
These
local
variables
must
be
declared
in
a
kernel,
//
and
then
passed
to
non-kernel
functions.
__local
float
genotype[ACTUAL_GENOTYPE_LENGTH]
;
__local
float
energy
;
__local
int
run_id
;
//
Some
OpenCL
compilers
don
't
allow
local
var
outside
kernels
//
so
this
local
vars
are
passed
from
a
kernel
__local
float
calc_coords_x[MAX_NUM_OF_ATOMS]
;
__local
float
calc_coords_y[MAX_NUM_OF_ATOMS]
;
__local
float
calc_coords_z[MAX_NUM_OF_ATOMS]
;
__local
float
partial_energies[NUM_OF_THREADS_PER_BLOCK]
;
#
if
defined
(
DEBUG_ENERGY_KERNEL
)
__local
float
partial_interE[NUM_OF_THREADS_PER_BLOCK]
;
__local
float
partial_intraE[NUM_OF_THREADS_PER_BLOCK]
;
#
endif
//
Copying
genotype
from
global
memory
event_t
ev
=
async_work_group_copy
(
genotype,
dockpars_conformations_current
+
GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id
(
0
)
,
ACTUAL_GENOTYPE_LENGTH,
0
)
;
//
d
etermining
run
ID
//
D
etermining
run
-
ID
if
(
get_local_id
(
0
)
==
0
)
{
run_id
=
get_group_id
(
0
)
/
dockpars_pop_size
;
}
//
Asynchronous
copy
should
be
finished
by
here
wait_group_events
(
1
,
&ev
)
;
wait_group_events
(
1
,
&ev
)
;
//
Evaluating
initial
genotype
//
Evaluating
initial
genotype
s
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
=============================================================
...
...
@@ -91,6 +91,9 @@ gpu_calc_initpop( char dockpars_num_of_atoms,
dockpars_gridsize_x,
dockpars_gridsize_y,
dockpars_gridsize_z,
//
g1
=
gridsize_x
dockpars_gridsize_x_times_y,
//
g2
=
gridsize_x
*
gridsize_y
dockpars_gridsize_x_times_y_times_z,
//
g3
=
gridsize_x
*
gridsize_y
*
gridsize_z
dockpars_fgrids,
dockpars_num_of_atypes,
dockpars_num_of_intraE_contributors,
...
...
@@ -103,22 +106,35 @@ gpu_calc_initpop( char dockpars_num_of_atoms,
genotype,
&energy,
&run_id,
//
Some
OpenCL
compilers
don
't
allow
local
var
outside
kernels
//
so
this
local
vars
are
passed
from
a
kernel
//
Some
OpenCL
compilers
don
't
allow
declaring
//
local
variables
within
non-kernel
functions.
//
These
local
variables
must
be
declared
in
a
kernel,
//
and
then
passed
to
non-kernel
functions.
calc_coords_x,
calc_coords_y,
calc_coords_z,
partial_energies,
kerconst_interintra,
kerconst_intracontrib,
kerconst_intra,
kerconst_rotlist,
kerconst_conform
)
;
#
if
defined
(
DEBUG_ENERGY_KERNEL
)
partial_interE,
partial_intraE,
#
endif
#
if
0
false,
#
endif
kerconst_interintra,