Commit 8ecca915 authored by Leonardo Solis's avatar Leonardo Solis
Browse files

Initial commit

parents
# ===================
# C gitignore
# https://github.com/github/gitignore/blob/master/C.gitignore
# ===================
# Object files
*.o
*.ko
*.obj
*.elf
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
# ===================
# C++ gitignore
# https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
# ===================
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
# odock Makefile
AMD_INCLUDE_PATH=/opt/AMDAPPSDK-3.0/include
AMD_LIBRARY_PATH=/opt/amdgpu-pro/lib/x86_64-linux-gnu
# Project directories
# opencl_lvs: wrapper for OpenCL APIs
COMMON_DIR=./common
OCL_INC_DIR=./opencl_lvs/inc
OCL_SRC_DIR=./opencl_lvs/src
HOST_INC_DIR=./host/inc
HOST_SRC_DIR=./host/src
KRNL_DIR=./device
# Host sources
OCL_SRC=$(wildcard $(OCL_SRC_DIR)/*.cpp)
HOST_SRC=$(wildcard $(HOST_SRC_DIR)/*.cpp)
SRC=$(OCL_SRC) $(HOST_SRC)
IFLAGS=-I$(COMMON_DIR) -I$(OCL_INC_DIR) -I$(HOST_INC_DIR) -I$(AMD_INCLUDE_PATH)
LFLAGS=-L$(AMD_LIBRARY_PATH)
CFLAGS=$(IFLAGS) $(LFLAGS)
# Device sources
KRNL_MAIN=calcenergy.cl
KRNL_SRC=$(KRNL_DIR)/$(KRNL_MAIN)
# Kernel names
K1_NAME="gpu_calc_initpop"
K2_NAME="gpu_sum_evals"
K3_NAME="perform_LS"
K4_NAME="gpu_gen_and_eval_newpops"
K_NAMES=-DK1=$(K1_NAME) -DK2=$(K2_NAME) -DK3=$(K3_NAME) -DK4=$(K4_NAME)
# Kernel flags
KFLAGS=-DKRNL_SOURCE=$(KRNL_DIR)/$(KRNL_MAIN) -DKRNL_DIRECTORY=$(KRNL_DIR) $(K_NAMES)
TARGET := ocladock
BIN := $(wildcard $(TARGET)*)
# ------------------------------------------------------
# Choose OpenCL device
# Valid values: CPU, GPU
DEVICE=GPU
ifeq ($(DEVICE), CPU)
DEV =-DCPU_DEVICE
else ifeq ($(DEVICE), GPU)
DEV =-DGPU_DEVICE
endif
# Number of work-items (wi)
# Valid values: 16, 32, 64, 128
NWI=
ifeq ($(NWI), 16)
NWI=-DN16WI
TARGET:=$(TARGET)_16wi
else ifeq ($(NWI), 32)
NWI=-DN32WI
TARGET:=$(TARGET)_32wi
else ifeq ($(NWI), 64)
NWI=-DN64WI
TARGET:=$(TARGET)_64wi
else ifeq ($(NWI), 128)
NWI=-DN128WI
TARGET:=$(TARGET)_128wi
else
ifeq ($(DEVICE), CPU)
NWI=-DN16WI
TARGET:=$(TARGET)_16wi
else ifeq ($(DEVICE), GPU)
NWI=-DN64WI
TARGET:=$(TARGET)_64wi
endif
endif
# ------------------------------------------------------
# Configuration (Host)
# Valid values: release, debug
CONFIG=release
ifeq ($(CONFIG),debug)
OPT =-O0 -g3 -Wall
else ifeq ($(CONFIG),release)
OPT =-O3
else
OPT =
endif
# Host and Device Debug
DOCK_DEBUG=NO
# Reproduce results (remove randomness)
REPRO=NO
ifeq ($(DOCK_DEBUG),YES)
DD =-DDOCK_DEBUG
else
DD =
endif
ifeq ($(REPRO),YES)
REP =-DREPRO
else
REP =
endif
# ------------------------------------------------------
all: odock
odock: $(SRC)
g++ $(SRC) $(CFLAGS) -lOpenCL -o$(TARGET) $(DEV) $(NWI) $(OPT) $(DD) $(REP) $(KFLAGS)
clean:
rm -f $(BIN) initpop.txt
odock
# Old commands
# This will execute 1 run
./ofdock_amd -ffile ./input_data/1hvr_vegl.maps.fld -lfile ./input_data/1hvrl.pdbqt
# This will execute 10 runs
./ofdock_amd -ffile ./input_data/1hvr_vegl.maps.fld -lfile ./input_data/1hvrl.pdbqt -nrun 10
# Updated commands for open-source release
./odock_64wi -ffile ./input_data/1stp/derived/1stp_protein.maps.fld -lfile ./input_data/1stp/derived/1stp_ligand.pdbqt -nrun 10
./odock_64wi -ffile ./input_data/3ce3/derived/3ce3_protein.maps.fld -lfile ./input_data/3ce3/derived/3ce3_ligand.pdbqt -nrun 10
odock
getparameters.cpp -> contains default values of docking parameters:
//default values | | name of parameter flags
mypars->num_of_energy_evals = 2500000; | | -nev: number of energy evaluations
mypars->num_of_generations = 27000; | | -ngen: number of generations
mypars->abs_max_dmov = 6.0/(*spacing); |// +/-6A | -dmov: max delta movement during mutation
mypars->abs_max_dang = 90; |// +/- 90° | -dang: max delta angle during mutation
mypars->mutation_rate = 2; |// 2% | -mrat: mutation rate
mypars->crossover_rate = 80; |// 80% | -crat: crossover rate
mypars->lsearch_rate = 6; |// 6% | -lsrat: local search rate
// unsigned long num_of_ls | |
mypars->tournament_rate = 60; |// 60% | -trat: tournament rate
mypars->rho_lower_bound = 0.01; |// 0.01 | -rholb: rho lower bound
mypars->base_dmov_mul_sqrt3 = 2.0/(*spacing)*sqrt(3.0); |// 2 A | -lsmov: local serach delta movement
mypars->base_dang_mul_sqrt3 = 75.0*sqrt(3.0); |// 75° | -lsang: local search delat angle
mypars->cons_limit = 4; |// 4 | -cslim: consecutive succ/failure limit
mypars->max_num_of_iters = 300; | | -lsit: max num it for local search
mypars->pop_size = 150; | | -psize: size of population
mypars->initpop_gen_or_loadfile = 0; | | -pload: load init pop from file instead
| | of generating a new one
mypars->gen_pdbs = 0; | | -npdb: num of pdb files to be generated
// char fldfile [128] | |
// char ligandfile [128] | |
// float ref_ori_angles [3] | |
mypars->num_of_runs = 1; | | -nrun: number of runs
mypars->reflig_en_reqired = 0; | | -rlige: energy of ref ligand required
// char unbound_model | |
// AD4_free_energy_coeffs coeffs |
mypars->handle_symmetry = 0; | | -hsym: handle molecular symmetry
| | during rmsd calculation
| |
mypars->gen_finalpop = 0; | | -gfpop: generate final population results
| | file
mypars->gen_best = 0; | | -gbest: generate best.pdbqt
strcpy(mypars->resname, "docking"); | | -resname: name the result file
mypars->qasp = 0.01097f; | | -modqp: use modified QASP
mypars->rmsd_tolerance = 2.0; |//2 Angström | -rmstol:rmsd tolerance for clustering
| |
| |
| | -ubmod: unbound model to be used
| |
#ifndef CALCENERGY_BASIC_H_
#define CALCENERGY_BASIC_H_
#include "defines.h"
#define RLIST_ATOMID_MASK 0x000000FF
#define RLIST_RBONDID_MASK 0x0000FF00
#define RLIST_RBONDID_SHIFT 8
#define RLIST_FIRSTROT_MASK 0x00010000
#define RLIST_GENROT_MASK 0x00020000
#define RLIST_DUMMY_MASK 0x00040000
#define DEG_TO_RAD 0.0174533f
// LCG: linear congruential generator constants
#define RAND_A 1103515245u
#define RAND_C 12345u
// WARNING: it is supposed that unsigned int is 32 bit long
#define MAX_UINT 4294967296.0f
// Macro for capturing grid values
// Original
#define GETGRIDVALUE(mempoi,gridsize_x,gridsize_y,gridsize_z,t,z,y,x) *(mempoi + gridsize_x*(y + gridsize_y*(z + gridsize_z*t)) + x)
// Optimization 1
// #define GETGRIDVALUE_OPT(mempoi,gridsize_x,gridsize_y,mul_tmp,z,y,x) *(mempoi + gridsize_x*(y + gridsize_y*(z + mul_tmp)) + x)
// Optimization 2
// Implemented direclty in the kernel code: calcenergy_fourkernels_intel.cl
// Macro for trilinear interpolation
#define TRILININTERPOL(cube, weights) (cube[0][0][0]*weights[0][0][0] +cube[1][0][0]*weights[1][0][0] + \
cube[0][1][0]*weights[0][1][0] +cube[1][1][0]*weights[1][1][0] + \
cube[0][0][1]*weights[0][0][1] +cube[1][0][1]*weights[1][0][1] + \
cube[0][1][1]*weights[0][1][1] +cube[1][1][1]*weights[1][1][1])
#endif /* CALCENERGY_BASIC_H_ */
/*
* (C) 2013. Evopro Innovation Kft.
*
* defines.h
*
* Created on: 2009.05.29.
* Author: pechan.imre
*/
#ifndef DEFINES_H_
#define DEFINES_H_
#if defined (N16WI)
#define NUM_OF_THREADS_PER_BLOCK 16
#elif defined (N32WI)
#define NUM_OF_THREADS_PER_BLOCK 32
#elif defined (N64WI)
#define NUM_OF_THREADS_PER_BLOCK 64
#elif defined (N128WI)
#define NUM_OF_THREADS_PER_BLOCK 128
#else
#define NUM_OF_THREADS_PER_BLOCK 64
#endif
#define MAX_NUM_OF_ATOMS 90
#define MAX_NUM_OF_ATYPES 14
#define MAX_INTRAE_CONTRIBUTORS 8128
#define MAX_NUM_OF_ROTATIONS 4096
#define MAX_NUM_OF_ROTBONDS 32
#define MAX_POPSIZE 2048
#define MAX_NUM_OF_RUNS 100
// Must be bigger than MAX_NUM_OF_ROTBONDS+6
#define GENOTYPE_LENGTH_IN_GLOBMEM 64
#define ACTUAL_GENOTYPE_LENGTH (MAX_NUM_OF_ROTBONDS+6)
#define LS_EXP_FACTOR 2.0f
#define LS_CONT_FACTOR 0.5f
// Improvements over Pechan's implementation
#define NATIVE_PRECISION
#define ASYNC_COPY
#define IMPROVE_GRID
#define RESTRICT_ARGS
#define MAPPED_COPY
#endif /* DEFINES_H_ */
// -------------------------------------------------------
//
// -------------------------------------------------------
unsigned int gpu_rand(
#if defined (RESTRICT_ARGS)
__global unsigned int* restrict prng_states
#else
__global unsigned int* prng_states
#endif
)
//The GPU device function generates a random int
//with a linear congruential generator.
//Each thread (supposing num_of_runs*pop_size blocks and NUM_OF_THREADS_PER_BLOCK threads per block)
//has its own state which is stored in the global memory area pointed by
//prng_states (thread with ID tx in block with ID bx stores its state in prng_states[bx*NUM_OF_THREADS_PER_BLOCK+$
//The random number generator uses the gcc linear congruential generator constants.
{
unsigned int state;
#if defined (REPRO)
state = 1;
#else
//current state of the threads own PRNG
//state = prng_states[get_group_id(0)*NUM_OF_THREADS_PER_BLOCK + get_local_id(0)];
state = prng_states[get_global_id(0)];
//calculating next state
state = (RAND_A*state+RAND_C);
#endif
//saving next state to memory
//prng_states[get_group_id(0)*NUM_OF_THREADS_PER_BLOCK + get_local_id(0)] = state;
prng_states[get_global_id(0)] = state;
return state;
}
// -------------------------------------------------------
//
// -------------------------------------------------------
float gpu_randf(
#if defined (RESTRICT_ARGS)
__global unsigned int* restrict prng_states
#else
__global unsigned int* prng_states
#endif
)
//The GPU device function generates a
//random float greater than (or equal to) 0 and less than 1.
//It uses gpu_rand() function.
{
float state;
//state will be between 0 and 1
#if defined (REPRO)
state = 0.55f; //0.55f;
#else
#if defined (NATIVE_PRECISION)
state = native_divide(gpu_rand(prng_states),MAX_UINT)*0.999999f;
#elif defined (HALF_PRECISION)
state = half_divide(gpu_rand(prng_states),MAX_UINT)*0.999999f;
#else // Full precision
state = (((float) gpu_rand(prng_states))/MAX_UINT)*0.999999f;
#endif
#endif
return state;
}
// -------------------------------------------------------
//
// -------------------------------------------------------
void map_angle(__local float* angle)
//The GPU device function maps
//the input parameter to the interval 0...360
//(supposing that it is an angle).
{
while (*angle >= 360.0f)
*angle -= 360.0f;
while (*angle < 0.0f)
*angle += 360.0f;
}
// -------------------------------------------------------
//
// -------------------------------------------------------
void gpu_perform_elitist_selection(int dockpars_pop_size,
#if defined (RESTRICT_ARGS)
__global float* restrict dockpars_energies_current,
__global float* restrict dockpars_energies_next,
__global int* restrict dockpars_evals_of_new_entities,
#else
__global float* dockpars_energies_current,
__global float* dockpars_energies_next,
__global int* dockpars_evals_of_new_entities,
#endif
int dockpars_num_of_genes,
#if defined (RESTRICT_ARGS)
__global float* restrict dockpars_conformations_next,
__global const float* restrict dockpars_conformations_current
#else
__global float* dockpars_conformations_next,
__global const float* dockpars_conformations_current
#endif
)
//The GPU device function performs elitist selection,
//that is, it looks for the best entity in conformations_current and
//energies_current of the run that corresponds to the block ID,
//and copies it to the place of the first entity in
//conformations_next and energies_next.
{
int entity_counter;
int gene_counter;
__local float best_energies[NUM_OF_THREADS_PER_BLOCK];
__local int best_IDs[NUM_OF_THREADS_PER_BLOCK];
float best_energy;
__local int best_ID;
if (get_local_id(0) < dockpars_pop_size)
{
best_energies[get_local_id(0)] = dockpars_energies_current[get_group_id(0)+get_local_id(0)];
best_IDs[get_local_id(0)] = get_local_id(0);
}
for (entity_counter=NUM_OF_THREADS_PER_BLOCK+get_local_id(0);
entity_counter<dockpars_pop_size;
entity_counter+=NUM_OF_THREADS_PER_BLOCK)
if (dockpars_energies_current[get_group_id(0)+entity_counter] < best_energies[get_local_id(0)])
{
best_energies[get_local_id(0)] = dockpars_energies_current[get_group_id(0)+entity_counter];
best_IDs[get_local_id(0)] = entity_counter;
}
barrier(CLK_LOCAL_MEM_FENCE);
//this could be implemented with a tree-like structure
//which may be slightly faster
if (get_local_id(0) == 0)
{
best_energy = best_energies[0];
best_ID = best_IDs[0];
for (entity_counter=1;
entity_counter<NUM_OF_THREADS_PER_BLOCK;
entity_counter++)
if ((best_energies[entity_counter] < best_energy) && (entity_counter < dockpars_pop_size))
{
best_energy = best_energies[entity_counter];
best_ID = best_IDs[entity_counter];
}
//setting energy value of new entity
dockpars_energies_next[get_group_id(0)] = best_energy;
//0 evals were performed for entity selected with elitism (since it was copied only)
dockpars_evals_of_new_entities[get_group_id(0)] = 0;
}
//now best_id stores the id of the best entity in the population,
//copying genotype and energy value to the first entity of new population
barrier(CLK_LOCAL_MEM_FENCE);
for (gene_counter=get_local_id(0);
gene_counter<dockpars_num_of_genes;
gene_counter+=NUM_OF_THREADS_PER_BLOCK)
dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+gene_counter] = dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID+gene_counter];
}
This diff is collapsed.
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
gpu_calc_initpop( char dockpars_num_of_atoms,
char dockpars_num_of_atypes,
int dockpars_num_of_intraE_contributors,
char dockpars_gridsize_x,
char dockpars_gridsize_y,
char dockpars_gridsize_z,
float dockpars_grid_spacing,
#if defined (RESTRICT_ARGS)
__global const float* restrict dockpars_fgrids, // cannot be allocated in __constant (too large)
#else
__global const float* dockpars_fgrids, // cannot be allocated in __constant (too large)
#endif
int dockpars_rotbondlist_length,
float dockpars_coeff_elec,
float dockpars_coeff_desolv,
#if defined (RESTRICT_ARGS)
__global const float* restrict dockpars_conformations_current,
__global float* restrict dockpars_energies_current,
__global int* restrict dockpars_evals_of_new_entities,
#else
__global const float* dockpars_conformations_current,
__global float* dockpars_energies_current,
__global int* dockpars_evals_of_new_entities,
#endif
int dockpars_pop_size,
float dockpars_qasp,
__constant float* atom_charges_const,
__constant char* atom_types_const,
__constant char* intraE_contributors_const,
__constant float* VWpars_AC_const,
__constant float* VWpars_BD_const,
__constant float* dspars_S_const,
__constant float* dspars_V_const,
__constant int* rotlist_const,
__constant float* ref_coords_x_const,
__constant float* ref_coords_y_const,
__constant float* ref_coords_z_const,
__constant float* rotbonds_moving_vectors_const,
__constant float* rotbonds_unit_vectors_const,
__constant float* ref_orientation_quats_const
){
__local float genotype[GENOTYPE_LENGTH_IN_GLOBMEM];
__local float energy;
__local int run_id;
event_t ev = async_work_group_copy(genotype,
dockpars_conformations_current + GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0),
GENOTYPE_LENGTH_IN_GLOBMEM, 0);
wait_group_events(1,&ev);
//determining run ID
if (get_local_id(0) == 0)
run_id = get_group_id(0) / dockpars_pop_size;
// =============================================================
// WARNING: only energy of work-item=0 will be valid
gpu_calc_energy(dockpars_rotbondlist_length,
dockpars_num_of_atoms,
dockpars_gridsize_x,
dockpars_gridsize_y,
dockpars_gridsize_z,
dockpars_fgrids,
dockpars_num_of_atypes,
dockpars_num_of_intraE_contributors,
dockpars_grid_spacing,
dockpars_coeff_elec,
dockpars_qasp,
dockpars_coeff_desolv,
genotype,
&energy,
&run_id,
atom_charges_const,
atom_types_const,
intraE_contributors_const,
VWpars_AC_const,
VWpars_BD_const,
dspars_S_const,
dspars_V_const,
rotlist_const,
ref_coords_x_const,
ref_coords_y_const,
ref_coords_z_const,
rotbonds_moving_vectors_const,
rotbonds_unit_vectors_const,
ref_orientation_quats_const);
// =============================================================
if (get_local_id(0) == 0) {
dockpars_energies_current[get_group_id(0)] = energy;
dockpars_evals_of_new_entities[get_group_id(0)] = 1;
}
}
__kernel void __attribute__ ((reqd_work_group_size(NUM_OF_THREADS_PER_BLOCK,1,1)))
gpu_sum_evals(unsigned long pop_size,
/*unsigned long num_of_runs,*/
#if defined (RESTRICT_ARGS)
__global int* restrict dockpars_evals_of_new_entities,
__global int* restrict evals_of_runs
#else
__global int* dockpars_evals_of_new_entities,
__global int* evals_of_runs
#endif
)
//The GPU global function sums the evaluation counter states
//which are stored in evals_of_new_entities array foreach entity,
//calculates the sums for each run and stores it in evals_of_runs array.
//The number of blocks which should be started equals to num_of_runs,
//since each block performs the summation for one run.
{
int entity_counter;
int sum_evals;
__local int partsum_evals[NUM_OF_THREADS_PER_BLOCK];
partsum_evals[get_local_id(0)] = 0;
#if defined (ASYNC_COPY)
__local int local_evals_of_new_entities[MAX_POPSIZE]; // defined in defines.h
async_work_group_copy(local_evals_of_new_entities,