Commit b3bb1d46 authored by lvs's avatar lvs
Browse files

removed large local for intracontributors

parent 249cd107
......@@ -205,11 +205,11 @@ PDB := 3ce3
NRUN := 100
POPSIZE := 150
TESTNAME := test
TESTLS := sw
NUM_LSIT := 300
TESTLS := sd
NUM_LSIT := 100
test: odock
$(BIN_DIR)/$(TARGET) -ffile ./input/$(PDB)/derived/$(PDB)_protein.maps.fld -lfile ./input/$(PDB)/derived/$(PDB)_ligand.pdbqt -nrun $(NRUN) -psize $(POPSIZE) -resnam $(TESTNAME) -gfpop 1 -lsmet $(TESTLS) -lsit $(NUM_LSIT)
$(BIN_DIR)/$(TARGET) -ffile ./input/$(PDB)/derived/$(PDB)_protein.maps.fld -lfile ./input/$(PDB)/derived/$(PDB)_ligand.pdbqt -nrun $(NRUN) -psize $(POPSIZE) -resnam $(TESTNAME) -gfpop 1 -lsmet $(TESTLS) -lsit $(NUM_LSIT) -smooth 0.5
ASTEX_PDB := 2bsm
ASTEX_NRUN:= 10
......
......@@ -42,8 +42,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#define MAX_NUM_OF_ATOMS 256
#define MAX_NUM_OF_ATYPES 14
#define MAX_NUM_OF_ROTBONDS 32
#define MAX_INTRAE_CONTRIBUTORS 4096 // ideally =MAX_NUM_OF_ATOMS * MAX_NUM_OF_ATOMS,
// but too large for __local in gradient calculation
#define MAX_INTRAE_CONTRIBUTORS 4096 // Ideally =MAX_NUM_OF_ATOMS * MAX_NUM_OF_ATOMS,
// but too large for __local in gradient calculation.
// If calculated as #rotbons * #atoms = 32 * 256 = 8192,
// then error CL_OUT_OF_RESOURCES.
#define MAX_NUM_OF_ROTATIONS (MAX_NUM_OF_ATOMS * MAX_NUM_OF_ROTBONDS)
#define MAX_POPSIZE 2048
#define MAX_NUM_OF_RUNS 1000
......
......@@ -40,6 +40,41 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
//#define DEBUG_GRAD_TORSION_GENES
//#define DEBUG_ENERGY_KERNEL5
///*
void atomicAdd_g_f(volatile __local float *addr, float val)
{
union{
unsigned int u32;
float f32;
} next, expected, current;
current.f32 = *addr;
do{
expected.f32 = current.f32;
next.f32 = expected.f32 + val;
current.u32 = atomic_cmpxchg( (volatile __local unsigned int *)addr, expected.u32, next.u32);
} while( current.u32 != expected.u32 );
}
void atomicSub_g_f(volatile __local float *addr, float val)
{
union{
unsigned int u32;
float f32;
} next, expected, current;
current.f32 = *addr;
do{
expected.f32 = current.f32;
next.f32 = expected.f32 - val;
current.u32 = atomic_cmpxchg( (volatile __local unsigned int *)addr, expected.u32, next.u32);
} while( current.u32 != expected.u32 );
}
//*/
void gpu_calc_gradient(
int dockpars_rotbondlist_length,
char dockpars_num_of_atoms,
......@@ -107,7 +142,12 @@ void gpu_calc_gradient(
__local float* gradient_x,
__local float* gradient_y,
__local float* gradient_z,
//----------------------------------
// eliminate unnecessary local storage
//----------------------------------
/*
__local float* gradient_per_intracontributor,
*/
__local float* gradient_genotype
)
{
......@@ -126,12 +166,17 @@ void gpu_calc_gradient(
gradient_intra_z[atom_id] = 0.0f;
}
//----------------------------------
// eliminate unnecessary local storage
//----------------------------------
/*
// Initializing gradients per intramolecular contributor pairs
for (uint intracontrib_atompair_id = get_local_id(0);
intracontrib_atompair_id < dockpars_num_of_intraE_contributors;
intracontrib_atompair_id+= NUM_OF_THREADS_PER_BLOCK) {
gradient_per_intracontributor[intracontrib_atompair_id] = 0.0f;
}
*/
// Initializing gradient genotypes
for (uint gene_cnt = get_local_id(0);
......@@ -564,6 +609,12 @@ void gpu_calc_gradient(
contributor_counter < dockpars_num_of_intraE_contributors;
contributor_counter+= NUM_OF_THREADS_PER_BLOCK)
{
//----------------------------------
// eliminate unnecessary local storage
//----------------------------------
float priv_gradient_per_intracontributor= 0.0f;
//----------------------------------
// Getting atom IDs
uint atom1_id = intraE_contributors_const[3*contributor_counter];
uint atom2_id = intraE_contributors_const[3*contributor_counter+1];
......@@ -579,7 +630,8 @@ void gpu_calc_gradient(
float subz = calc_coords_z[atom1_id] - calc_coords_z[atom2_id];
// Calculating atomic distance
float atomic_distance = native_sqrt(subx*subx + suby*suby + subz*subz)*dockpars_grid_spacing;
float dist = native_sqrt(subx*subx + suby*suby + subz*subz);
float atomic_distance = dist*dockpars_grid_spacing;
// Calculating gradient contributions
if (atomic_distance < 8.0f)
......@@ -635,18 +687,23 @@ void gpu_calc_gradient(
}
*/
//----------------------------------
// eliminate unnecessary local storage
//----------------------------------
// Calculating van der Waals / hydrogen bond term
gradient_per_intracontributor[contributor_counter] += native_divide (-12*VWpars_AC_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],
/*gradient_per_intracontributor[contributor_counter]*/ priv_gradient_per_intracontributor += native_divide (-12*VWpars_AC_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],
native_powr(smoothed_distance/*atomic_distance*/, 13)
);
if (intraE_contributors_const[3*contributor_counter+2] == 1) { //H-bond
gradient_per_intracontributor[contributor_counter] += native_divide (10*VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],
/*gradient_per_intracontributor[contributor_counter]*/ priv_gradient_per_intracontributor += native_divide (10*VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],
native_powr(smoothed_distance/*atomic_distance*/, 11)
);
}
else { //van der Waals
gradient_per_intracontributor[contributor_counter] += native_divide (6*VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],
/*gradient_per_intracontributor[contributor_counter]*/ priv_gradient_per_intracontributor += native_divide (6*VWpars_BD_const[atom1_typeid * dockpars_num_of_atypes+atom2_typeid],
native_powr(smoothed_distance/*atomic_distance*/, 7)
);
}
......@@ -657,19 +714,94 @@ void gpu_calc_gradient(
float lower = native_powr(atomic_distance, 2) * native_powr(DIEL_A * (native_exp(DIEL_B_TIMES_H*atomic_distance) + DIEL_K) + DIEL_B * native_exp(DIEL_B_TIMES_H*atomic_distance), 2);
gradient_per_intracontributor[contributor_counter] += -dockpars_coeff_elec * atom_charges_const[atom1_id] * atom_charges_const[atom2_id] * native_divide (upper, lower);
/*gradient_per_intracontributor[contributor_counter]*/ priv_gradient_per_intracontributor += -dockpars_coeff_elec * atom_charges_const[atom1_id] * atom_charges_const[atom2_id] * native_divide (upper, lower);
// Calculating desolvation term
gradient_per_intracontributor[contributor_counter] += (
/*gradient_per_intracontributor[contributor_counter]*/ priv_gradient_per_intracontributor += (
(dspars_S_const[atom1_typeid] + dockpars_qasp*fabs(atom_charges_const[atom1_id])) * dspars_V_const[atom2_typeid] +
(dspars_S_const[atom2_typeid] + dockpars_qasp*fabs(atom_charges_const[atom2_id])) * dspars_V_const[atom1_typeid]
) *
dockpars_coeff_desolv * -0.07716049382716049 * atomic_distance * native_exp(-0.038580246913580245*native_powr(atomic_distance, 2));
//----------------------------------
//----------------------------------
// eliminate unnecessary local storage
//----------------------------------
// Accumulating gradients from "priv_gradient_per_intracontributor" for each each
float subx_div_dist = native_divide(subx, dist);
float suby_div_dist = native_divide(suby, dist);
float subz_div_dist = native_divide(subz, dist);
float priv_intra_gradient_x = priv_gradient_per_intracontributor * subx_div_dist;
float priv_intra_gradient_y = priv_gradient_per_intracontributor * suby_div_dist;
float priv_intra_gradient_z = priv_gradient_per_intracontributor * subz_div_dist;
/*
barrier(CLK_LOCAL_MEM_FENCE);
gradient_intra_x[atom1_id] -= priv_intra_gradient_x;
gradient_intra_y[atom1_id] -= priv_intra_gradient_y;
gradient_intra_z[atom1_id] -= priv_intra_gradient_z;
barrier(CLK_LOCAL_MEM_FENCE);
gradient_intra_x[atom2_id] += priv_intra_gradient_x;
gradient_intra_y[atom2_id] += priv_intra_gradient_y;
gradient_intra_z[atom2_id] += priv_intra_gradient_z;
*/
#if 1
// Calculating gradients in xyz components.
// Gradients for both atoms in a single contributor pair
// have the same magnitude, but opposite directions
/*
gradient_intra_x[atom1_id] -= priv_intra_gradient_x;
gradient_intra_y[atom1_id] -= priv_intra_gradient_y;
gradient_intra_z[atom1_id] -= priv_intra_gradient_z;
*/
atomicSub_g_f(&gradient_intra_x[atom1_id], priv_intra_gradient_x);
atomicSub_g_f(&gradient_intra_y[atom1_id], priv_intra_gradient_y);
atomicSub_g_f(&gradient_intra_z[atom1_id], priv_intra_gradient_z);
/*
gradient_intra_x[atom2_id] += priv_intra_gradient_x;
gradient_intra_y[atom2_id] += priv_intra_gradient_y;
gradient_intra_z[atom2_id] += priv_intra_gradient_z;
*/
//atomic_add(&gradient_intra_x[atom2_id], priv_intra_gradient_x);
//atomic_add(&gradient_intra_y[atom2_id], priv_intra_gradient_y);
//atomic_add(&gradient_intra_z[atom2_id], priv_intra_gradient_z);
atomicAdd_g_f(&gradient_intra_x[atom2_id], priv_intra_gradient_x);
atomicAdd_g_f(&gradient_intra_y[atom2_id], priv_intra_gradient_y);
atomicAdd_g_f(&gradient_intra_z[atom2_id], priv_intra_gradient_z);
//----------------------------------
#endif
}
} // End contributor_counter for-loop (INTRAMOLECULAR ENERGY)
//----------------------------------
// eliminate unnecessary local storage
//----------------------------------
/*
barrier(CLK_LOCAL_MEM_FENCE);
// Accumulating gradients from "gradient_per_intracontributor" for each each
......@@ -707,7 +839,24 @@ void gpu_calc_gradient(
//printf("%-20s %-10u %-5u %-5u %-10.8f\n", "grad_intracontrib", contributor_counter, atom1_id, atom2_id, gradient_per_intracontributor[contributor_counter]);
}
}
*/
barrier(CLK_LOCAL_MEM_FENCE);
......
......@@ -154,7 +154,14 @@ gradient_minimizer(
__local float gradient_intra_x[MAX_NUM_OF_ATOMS];
__local float gradient_intra_y[MAX_NUM_OF_ATOMS];
__local float gradient_intra_z[MAX_NUM_OF_ATOMS];
//----------------------------------
// eliminate unnecessary local storage
//----------------------------------
/*
__local float gradient_per_intracontributor[MAX_INTRAE_CONTRIBUTORS];
*/
// Accummulated gradient per each ligand atom
__local float gradient_x[MAX_NUM_OF_ATOMS];
......@@ -465,7 +472,12 @@ gradient_minimizer(
gradient_x,
gradient_y,
gradient_z,
//----------------------------------
// eliminate unnecessary local storage
//----------------------------------
/*
gradient_per_intracontributor,
*/
gradient
);
// =============================================================
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment