Commit 69f8fedf authored by Leonardo Solis's avatar Leonardo Solis
Browse files

SPEEDUP: reduced scope+removed __local caches

Former-commit-id: 53610648
parent 08197fa4
......@@ -5,7 +5,7 @@
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_Conform(
/*__constant*/ __global const int* restrict KerConstStatic_rotlist_const,
__global const int* restrict KerConstStatic_rotlist_const,
#if defined (FIXED_POINT_CONFORM)
__constant fixedpt3* restrict KerConstStatic_ref_coords_const, // must be formatted in host
__constant fixedpt3* restrict KerConstStatic_rotbonds_moving_vectors_const, // must be formatted in host
......@@ -18,23 +18,12 @@ void Krnl_Conform(
unsigned int DockConst_rotbondlist_length,
unsigned char DockConst_num_of_atoms,
unsigned char DockConst_num_of_genes,
/*
unsigned char Host_num_of_rotbonds,
*/
#if defined (FIXED_POINT_CONFORM)
/*
fixedpt ref_orientation_quats_const_0, // must be formatted in host
fixedpt ref_orientation_quats_const_1, // must be formatted in host
fixedpt ref_orientation_quats_const_2, // must be formatted in host
fixedpt ref_orientation_quats_const_3 // must be formatted in host
*/
__constant fixedpt4* restrict KerConstStatic_ref_orientation_quats_const,
#else
/*
float ref_orientation_quats_const_0,
float ref_orientation_quats_const_1,
float ref_orientation_quats_const_2,
float ref_orientation_quats_const_3
*/
__constant float4* restrict KerConstStatic_ref_orientation_quats_const,
#endif
unsigned short Host_RunId
......@@ -55,11 +44,13 @@ void Krnl_Conform(
__local float __attribute__((numbanks(8), bankwidth(16))) loc_coords[MAX_NUM_OF_ATOMS][4];
*/
/*
#if defined (FIXED_POINT_CONFORM)
__local fixedpt genotype[ACTUAL_GENOTYPE_LENGTH];
__local fixedpt genotype [ACTUAL_GENOTYPE_LENGTH];
#else
__local float genotype[ACTUAL_GENOTYPE_LENGTH];
__local float genotype [ACTUAL_GENOTYPE_LENGTH];
#endif
*/
/*
bool active = true;
......@@ -71,6 +62,7 @@ void Krnl_Conform(
rotlist_localcache [c] = KerConstStatic_rotlist_const [c];
}
/*
#if defined (FIXED_POINT_CONFORM)
__local fixedpt3 ref_coords_localcache [MAX_NUM_OF_ATOMS];
#else
......@@ -79,20 +71,24 @@ void Krnl_Conform(
for (uchar c = 0; c < DockConst_num_of_atoms; c++) {
ref_coords_localcache [c] = KerConstStatic_ref_coords_const [c];
}
*/
/*
#if defined (FIXED_POINT_CONFORM)
__local fixedpt3 rotbonds_moving_vectors_localcache[MAX_NUM_OF_ROTBONDS];
__local fixedpt3 rotbonds_unit_vectors_localcache[MAX_NUM_OF_ROTBONDS];
__local fixedpt3 rotbonds_moving_vectors_localcache [MAX_NUM_OF_ROTBONDS];
__local fixedpt3 rotbonds_unit_vectors_localcache [MAX_NUM_OF_ROTBONDS];
#else
__local float3 rotbonds_moving_vectors_localcache[MAX_NUM_OF_ROTBONDS];
__local float3 rotbonds_unit_vectors_localcache[MAX_NUM_OF_ROTBONDS];
__local float3 rotbonds_moving_vectors_localcache [MAX_NUM_OF_ROTBONDS];
__local float3 rotbonds_unit_vectors_localcache [MAX_NUM_OF_ROTBONDS];
#endif
for (uchar c = 0; c < Host_num_of_rotbonds; c++) {
rotbonds_moving_vectors_localcache [c] = KerConstStatic_rotbonds_moving_vectors_const[c];
rotbonds_unit_vectors_localcache [c] = KerConstStatic_rotbonds_unit_vectors_const [c];
}
*/
#if defined (FIXED_POINT_CONFORM)
/*
#if defined (FIXED_POINT_CONFORM)
fixedpt4 ref_orientation_quats_const = KerConstStatic_ref_orientation_quats_const[Host_RunId];
fixedpt ref_orientation_quats_const_0 = ref_orientation_quats_const.x;
fixedpt ref_orientation_quats_const_1 = ref_orientation_quats_const.y;
......@@ -105,6 +101,7 @@ void Krnl_Conform(
float ref_orientation_quats_const_2 = ref_orientation_quats_const.z;
float ref_orientation_quats_const_3 = ref_orientation_quats_const.w;
#endif
*/
#pragma max_concurrency 32
while(active) {
......@@ -125,7 +122,7 @@ while(active) {
singlepump,
numreadports(3),
numwriteports(1)
)) loc_coords[MAX_NUM_OF_ATOMS];
)) loc_coords [MAX_NUM_OF_ATOMS];
#else
float phi;
float theta;
......@@ -141,7 +138,7 @@ while(active) {
singlepump,
numreadports(3),
numwriteports(1)
)) loc_coords[MAX_NUM_OF_ATOMS];
)) loc_coords [MAX_NUM_OF_ATOMS];
#endif
/*
......@@ -152,16 +149,15 @@ while(active) {
mem_fence(CLK_CHANNEL_MEM_FENCE);
active = actmode.x;
mode = actmode.y;
mode = actmode.y;
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
if (i == 0) {
mode = read_channel_altera(chan_IGL2Conform_mode);
mem_fence(CLK_CHANNEL_MEM_FENCE);
}
*/
#if defined (FIXED_POINT_CONFORM)
fixedpt genotype [ACTUAL_GENOTYPE_LENGTH];
#else
float genotype [ACTUAL_GENOTYPE_LENGTH];
#endif
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float fl_tmp = read_channel_altera(chan_IGL2Conform_genotype);
#if defined (FIXED_POINT_CONFORM)
// convert float to fixedpt
......@@ -223,7 +219,10 @@ while(active) {
if ((rotation_list_element & RLIST_FIRSTROT_MASK) != 0) //if first rotation of this atom
{
/*
atom_to_rotate = ref_coords_localcache [atom_id];
*/
atom_to_rotate = KerConstStatic_ref_coords_const [atom_id];
}
else
{
......@@ -252,11 +251,17 @@ while(active) {
else //if rotating around rotatable bond
{
uint rotbond_id = (rotation_list_element & RLIST_RBONDID_MASK) >> RLIST_RBONDID_SHIFT;
/*
rotation_unitvec = rotbonds_unit_vectors_localcache [rotbond_id];
*/
rotation_unitvec = KerConstStatic_rotbonds_unit_vectors_const [rotbond_id];
rotation_angle = genotype [6+rotbond_id];
/*
rotation_movingvec = rotbonds_moving_vectors_localcache [rotbond_id];
*/
rotation_movingvec = KerConstStatic_rotbonds_moving_vectors_const [rotbond_id];
//in addition performing the first movement
//which is needed only if rotating around rotatable bond
......@@ -307,6 +312,21 @@ while(active) {
//two rotations should be performed
//(multiplying the quaternions)
{
#if defined (FIXED_POINT_CONFORM)
const fixedpt4 ref_orientation_quats_const = KerConstStatic_ref_orientation_quats_const[Host_RunId];
const fixedpt ref_orientation_quats_const_0 = ref_orientation_quats_const.x;
const fixedpt ref_orientation_quats_const_1 = ref_orientation_quats_const.y;
const fixedpt ref_orientation_quats_const_2 = ref_orientation_quats_const.z;
const fixedpt ref_orientation_quats_const_3 = ref_orientation_quats_const.w;
#else
const float4 ref_orientation_quats_const = KerConstStatic_ref_orientation_quats_const[Host_RunId];
const float ref_orientation_quats_const_0 = ref_orientation_quats_const.x;
const float ref_orientation_quats_const_1 = ref_orientation_quats_const.y;
const float ref_orientation_quats_const_2 = ref_orientation_quats_const.z;
const float ref_orientation_quats_const_3 = ref_orientation_quats_const.w;
#endif
//calculating quatrot_left*ref_orientation_quats_const,
//which means that reference orientation rotation is the first
#if defined (FIXED_POINT_CONFORM)
......@@ -318,7 +338,7 @@ while(active) {
quatrot_temp = quatrot_left;
#endif
// L30nardoSV: taking the first element of ref_orientation_quats_const member
//taking the first element of ref_orientation_quats_const member
#if defined (FIXED_POINT_CONFORM)
quatrot_left_q = fixedpt_mul(quatrot_temp_q, ref_orientation_quats_const_0)
- fixedpt_mul(quatrot_temp_x, ref_orientation_quats_const_1)
......
......@@ -17,27 +17,24 @@ channel float chan_LS2Conf_LS2_genotype __attribute__((depth(ACTUAL_GENOT
channel float chan_LS2Conf_LS3_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
// IC, GG, LS1
/*channel float3 chan_Conf2Intere_xyz __attribute__((depth(MAX_NUM_OF_ATOMS)));*/
channel float8 chan_Conf2Intere_xyz __attribute__((depth(MAX_NUM_OF_ATOMS/2)));
channel char2 chan_Conf2Intere_actmode;
/*channel float3 chan_Conf2Intrae_xyz __attribute__((depth(MAX_NUM_OF_ATOMS)));*/
channel float8 chan_Conf2Intrae_xyz __attribute__((depth(MAX_NUM_OF_ATOMS/2)));
channel char2 chan_Conf2Intrae_actmode;
// Send data back to generators of genotypes
channel float chan_Intere2StoreIC_intere __attribute__((depth(/*20*/2)));
channel float chan_Intere2StoreGG_intere __attribute__((depth(/*20*/2)));
channel float chan_Intere2StoreLS_LS1_intere __attribute__((depth(/*20*/2))); // it requires 6% MAX_POPSIZE
channel float chan_Intere2StoreLS_LS2_intere __attribute__((depth(/*20*/2))); // it requires 6% MAX_POPSIZE
channel float chan_Intere2StoreLS_LS3_intere __attribute__((depth(/*20*/2))); // it requires 6% MAX_POPSIZE
channel float chan_Intrae2StoreIC_intrae __attribute__((depth(/*20*/2)));
channel float chan_Intrae2StoreGG_intrae __attribute__((depth(/*20*/2)));
channel float chan_Intrae2StoreLS_LS1_intrae __attribute__((depth(/*20*/2))); // it requires 6% MAX_POPSIZE
channel float chan_Intrae2StoreLS_LS2_intrae __attribute__((depth(/*20*/2))); // it requires 6% MAX_POPSIZE
channel float chan_Intrae2StoreLS_LS3_intrae __attribute__((depth(/*20*/2))); // it requires 6% MAX_POPSIZE
channel float chan_Intere2StoreIC_intere __attribute__((depth(2)));
channel float chan_Intere2StoreGG_intere __attribute__((depth(2)));
channel float chan_Intere2StoreLS_LS1_intere __attribute__((depth(2)));
channel float chan_Intere2StoreLS_LS2_intere __attribute__((depth(2)));
channel float chan_Intere2StoreLS_LS3_intere __attribute__((depth(2)));
channel float chan_Intrae2StoreIC_intrae __attribute__((depth(2)));
channel float chan_Intrae2StoreGG_intrae __attribute__((depth(2)));
channel float chan_Intrae2StoreLS_LS1_intrae __attribute__((depth(2)));
channel float chan_Intrae2StoreLS_LS2_intrae __attribute__((depth(2)));
channel float chan_Intrae2StoreLS_LS3_intrae __attribute__((depth(2)));
// PRNG kernerls
channel float8 chan_PRNG2GA_BT_ushort_float_prng;
......@@ -45,41 +42,35 @@ channel float8 chan_PRNG2GA_BT_ushort_float_prng;
channel uchar2 chan_PRNG2GA_GG_uchar_prng;
channel float chan_PRNG2GA_GG_float_prng __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel ushort3 chan_PRNG2GA_LS123_ushort_prng;
channel ushort3 chan_PRNG2GA_LS123_ushort_prng;
channel float chan_PRNG2GA_LS_float_prng __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_PRNG2GA_LS2_float_prng __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_PRNG2GA_LS3_float_prng __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel bool chan_Arbiter_BT_ushort_float_off;
channel bool chan_Arbiter_GG_uchar_off;
channel bool chan_Arbiter_GG_float_off;
channel bool chan_Arbiter_LS123_ushort_off;
channel bool chan_Arbiter_LS_float_off;
channel bool chan_Arbiter_LS2_float_off;
channel bool chan_Arbiter_LS3_float_off;
channel bool chan_Arbiter_BT_ushort_float_off;
channel bool chan_Arbiter_GG_uchar_off;
channel bool chan_Arbiter_GG_float_off;
channel bool chan_Arbiter_LS123_ushort_off;
channel bool chan_Arbiter_LS_float_off;
channel bool chan_Arbiter_LS2_float_off;
channel bool chan_Arbiter_LS3_float_off;
channel bool chan_GA2PRNG_LS_float_Off;
channel bool chan_GA2PRNG_LS_float_Off;
// LS1, LS2, LS3
channel float chan_GA2LS_LS1_energy;
channel float chan_GA2LS_LS1_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_GA2LS_LS2_energy;
channel float chan_GA2LS_LS2_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_GA2LS_LS3_energy;
channel float chan_GA2LS_LS1_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_GA2LS_LS2_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_GA2LS_LS3_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float2 chan_LS2GA_LS1_evalenergy __attribute__((depth(/*8*/2)));
channel float2 chan_LS2GA_LS1_evalenergy __attribute__((depth(2)));
channel float2 chan_LS2GA_LS2_evalenergy __attribute__((depth(2)));
channel float2 chan_LS2GA_LS3_evalenergy __attribute__((depth(2)));
channel float chan_LS2GA_LS1_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float2 chan_LS2GA_LS2_evalenergy __attribute__((depth(/*8*/2)));
channel float chan_LS2GA_LS2_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float2 chan_LS2GA_LS3_evalenergy __attribute__((depth(/*8*/2)));
channel float chan_LS2GA_LS3_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel bool chan_GA2LS_Off1_active;
......@@ -88,18 +79,6 @@ channel bool chan_GA2LS_Off3_active;
channel bool chan_IGLArbiter_Off;
/*
// IA Pipeline
channel bool chan_Intrae2IA_active;
channel float3 chan_Intrae2IA_fpipe __attribute__((depth(4)));
channel float chan_IA2Intrae_intraE;
// P2
channel bool chan_Intrae2IA_P2_active;
channel float3 chan_Intrae2IA_P2_fpipe __attribute__((depth(4)));
channel float chan_IA2Intrae_P2_intraE;
*/
#if defined (FIXED_POINT_CONFORM) || (FIXED_POINT_LS1) || defined (FIXED_POINT_LS2) || defined (FIXED_POINT_LS3)
#include "../defines_fixedpt.h"
......@@ -180,10 +159,8 @@ fixedpt fixedpt_map_angle_360(fixedpt angle)
// Originally from: searchoptimum.c
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_GA(__global float* restrict GlobPopulationCurrent,
__global float* restrict GlobEnergyCurrent,
void Krnl_GA(__global float* restrict GlobPopulationCurrent,
__global float* restrict GlobEnergyCurrent,
#if defined(SINGLE_COPY_POP_ENE)
__global unsigned int* restrict GlobEvals_performed,
__global unsigned int* restrict GlobGens_performed,
......@@ -276,10 +253,6 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
#endif
}
// ------------------------------------------------------------------
/*
__local float LocalPopNext[MAX_POPSIZE][ACTUAL_GENOTYPE_LENGTH];
__local float LocalEneNext[MAX_POPSIZE];
*/
uint eval_cnt = DockConst_pop_size; // takes into account the IC evals
uint ls_eval_cnt = 0;
......@@ -311,7 +284,7 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
numbanks(1),
bankwidth(64),
singlepump,
numreadports(6/*7*/),
numreadports(6),
numwriteports(1)
)) loc_energies[MAX_POPSIZE];
......@@ -349,15 +322,6 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
//}
}
#if 0
// elitism - copying the best entity to new population
//#pragma unroll 16
for (uchar gene_cnt=0; gene_cnt<DockConst_num_of_genes /*ACTUAL_GENOTYPE_LENGTH*/; gene_cnt++) {
LocalPopNext[0][gene_cnt & 0x3F] = LocalPopCurr[best_entity][gene_cnt & 0x3F];
}
LocalEneNext[0] = loc_energies[best_entity];
#endif
#pragma ivdep array (LocalPopNext)
#pragma ivdep array (LocalEneNext)
for (ushort new_pop_cnt = 1; new_pop_cnt < DockConst_pop_size; new_pop_cnt++) {
......@@ -535,8 +499,7 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
//for (ushort ls_ent_cnt=0; ls_ent_cnt<DockConst_num_of_lsentities; ls_ent_cnt++) {
for (ushort ls_ent_cnt=0; ls_ent_cnt<DockConst_num_of_lsentities; ls_ent_cnt+=3) {
// choose all random entities
// without checking if it has already been subjected to LS in this cycle
// choose random & different entities on every iteration
ushort3 entity_ls = read_channel_altera(chan_PRNG2GA_LS123_ushort_prng);
mem_fence(CLK_CHANNEL_MEM_FENCE);
......@@ -556,17 +519,6 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
/*
float2 evalenergy_tmp1 = read_channel_altera(chan_LS2GA_LS1_evalenergy);
float2 evalenergy_tmp2 = read_channel_altera(chan_LS2GA_LS2_evalenergy);
float2 evalenergy_tmp3 = read_channel_altera(chan_LS2GA_LS3_evalenergy);
mem_fence(CLK_CHANNEL_MEM_FENCE);
#if defined (DEBUG_KRNL_LS)
printf("LS - got all eval & energies back\n");
#endif
*/
float2 evalenergy_tmp1;
float2 evalenergy_tmp2;
float2 evalenergy_tmp3;
......@@ -585,6 +537,9 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
}
}
#if defined (DEBUG_KRNL_LS)
printf("LS - got all eval & energies back\n");
#endif
float eetmp1 = evalenergy_tmp1.x;
float eetmp2 = evalenergy_tmp2.x;
......@@ -605,12 +560,6 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
LocalPopNext[entity_ls3][gene_cnt & MASK_GENOTYPE] = read_channel_altera(chan_LS2GA_LS3_genotype);
}
ls_eval_cnt += eval_tmp1 + eval_tmp2 + eval_tmp3;
#if defined (DEBUG_KRNL_LS)
......@@ -644,10 +593,10 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
// ------------------------------------------------------------------
// Off: turn off all other kernels
// ------------------------------------------------------------------
write_channel_altera(chan_GA2LS_Off1_active, false); // turn off LS_Arbiter, LS1
write_channel_altera(chan_GA2LS_Off2_active, false); // turn off LS2_Arbiter, LS2
write_channel_altera(chan_GA2LS_Off3_active, false); // turn off LS3_Arbiter, LS3
write_channel_altera(chan_Arbiter_BT_ushort_float_off, false);
write_channel_altera(chan_GA2LS_Off1_active, false); // turn off LS_Arbiter, LS1
write_channel_altera(chan_GA2LS_Off2_active, false); // turn off LS2_Arbiter, LS2
write_channel_altera(chan_GA2LS_Off3_active, false); // turn off LS3_Arbiter, LS3
write_channel_altera(chan_Arbiter_BT_ushort_float_off, false);
write_channel_altera(chan_Arbiter_GG_uchar_off, false);
write_channel_altera(chan_Arbiter_GG_float_off, false);
......@@ -657,7 +606,7 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
write_channel_altera(chan_Arbiter_LS2_float_off, false);
write_channel_altera(chan_Arbiter_LS3_float_off, false);
write_channel_altera(chan_IGLArbiter_Off, false); // turn off IGL_Arbiter, Conform, InterE, IntraE
write_channel_altera(chan_IGLArbiter_Off, false); // turn off IGL_Arbiter, Conform, InterE, IntraE
for (ushort pop_cnt=0;pop_cnt<DockConst_pop_size; pop_cnt++) {
......
......@@ -6,11 +6,7 @@ channel float chan_IGL2Conform_genotype __attribute__((depth(3*ACTUAL_GENOTY
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_IGL_Arbiter(unsigned char DockConst_num_of_genes) {
/*
__local float genotypeIC [ACTUAL_GENOTYPE_LENGTH];
__local float genotypeGG [ACTUAL_GENOTYPE_LENGTH];
__local float genotype [3][ACTUAL_GENOTYPE_LENGTH];
*/
char active = 0x01;
// Only for debugging
......@@ -54,10 +50,7 @@ while(active) {
uchar bound_tmp = 0;
active = Off_valid ? 0x00 : 0x01;
char mode [3]; // mode for all LS
/*
float genotypeIC [ACTUAL_GENOTYPE_LENGTH];
float genotypeGG [ACTUAL_GENOTYPE_LENGTH];
*/
float genotypeICGG [ACTUAL_GENOTYPE_LENGTH];
float genotype [3][ACTUAL_GENOTYPE_LENGTH];
......@@ -66,14 +59,11 @@ while(active) {
//#pragma ivdep
for (uchar i=0; i<DockConst_num_of_genes; i++) {
if (IC_valid == true) {
//printf("%-15s %5s\n", "IC_valid: ", "reading genotypes");
if (i == 0) {bound_tmp++; }
//genotypeIC [i] = read_channel_altera(chan_IC2Conf_genotype);
genotypeICGG [i] = read_channel_altera(chan_IC2Conf_genotype);
}
else if (GG_valid == true) {
if (i == 0) {bound_tmp++; }
//genotypeGG [i] = read_channel_altera(chan_GG2Conf_genotype);
genotypeICGG [i] = read_channel_altera(chan_GG2Conf_genotype);
}
else{
......@@ -159,7 +149,6 @@ while(active) {
for (uchar i=0; i<DockConst_num_of_genes; i++) {
//float gene_tmp = IC_valid? genotypeIC[i]: GG_valid? genotypeGG[i]: genotype[j][i & MASK_GENOTYPE];
float gene_tmp = (IC_valid || GG_valid)? genotypeICGG[i]: genotype[j][i & MASK_GENOTYPE];
if (i > 2) {
......
/*
#if defined (FIXED_POINT_INTERE)
#include "../defines_fixedpt_64.h"
#endif
*/
// --------------------------------------------------------------------------
// The function calculates the intermolecular energy of a ligand given by
// myligand parameter, and a receptor represented as a grid.
......@@ -17,20 +11,15 @@
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_InterE(
//#if defined (FIXED_POINT_INTERE)
#if 0
__constant fixedpt64* restrict GlobFgrids,
#else
/*__constant*/ __global const float* restrict GlobFgrids,
#endif
__global const float* restrict GlobFgrids,
#if defined (FIXED_POINT_INTERE)
__constant fixedpt64* restrict KerConstStatic_atom_charges_const,
#else
__constant float* restrict KerConstStatic_atom_charges_const,
__constant float* restrict KerConstStatic_atom_charges_const,
#endif
__constant char* restrict KerConstStatic_atom_types_const,
__constant char* restrict KerConstStatic_atom_types_const,
unsigned char DockConst_g1,
unsigned int DockConst_g2,
......@@ -58,30 +47,32 @@ void Krnl_InterE(
*/
)
{
// local vars are allowed only at kernel scope
// however, they can be moved inside loops and still be local
// see how to do that here!
/*
bool active = true;
*/
char active = 0x01;
/*
__local char atom_types_localcache [MAX_NUM_OF_ATOMS];
*/
/*
#if defined (FIXED_POINT_INTERE)
__local fixedpt64 atom_charges_localcache [MAX_NUM_OF_ATOMS];
#else
__local float atom_charges_localcache [MAX_NUM_OF_ATOMS];
#endif
*/
/*
for (uchar i=0; i<DockConst_num_of_atoms; i++) {
atom_types_localcache [i] = KerConstStatic_atom_types_const [i];
atom_charges_localcache [i] = KerConstStatic_atom_charges_const [i];
}
*/
/*__constant*/ __global const float* GlobFgrids2 = & GlobFgrids [Host_mul_tmp2];
/*__constant*/ __global const float* GlobFgrids3 = & GlobFgrids [Host_mul_tmp3];
__global const float* GlobFgrids2 = & GlobFgrids [Host_mul_tmp2];
__global const float* GlobFgrids3 = & GlobFgrids [Host_mul_tmp3];
#pragma max_concurrency 32
while(active) {
......@@ -139,7 +130,10 @@ while(active) {
// for each atom
for (uchar atom1_id=0; atom1_id<DockConst_num_of_atoms; atom1_id++)
{
/*
char atom1_typeid = atom_types_localcache [atom1_id];
*/
char atom1_typeid = KerConstStatic_atom_types_const [atom1_id];
float3 loc_coords_atid1 = loc_coords[atom1_id];
......@@ -150,7 +144,10 @@ while(active) {
#if defined (FIXED_POINT_INTERE)
#else
/*
float q = atom_charges_localcache [atom1_id];
*/
float q = KerConstStatic_atom_charges_const [atom1_id];
#endif
#if defined (FIXED_POINT_INTERE)
......@@ -158,7 +155,10 @@ while(active) {
fixedpt64 fixpt_y = fixedpt64_fromfloat(loc_coords_atid1.y);
fixedpt64 fixpt_z = fixedpt64_fromfloat(loc_coords_atid1.z);
// fixedpt64 fixpt_q = fixedpt64_fromfloat(atom_charges_localcache [atom1_id]);
/*
fixedpt64 fixpt_q = atom_charges_localcache [atom1_id];
*/
fixedpt64 fixpt_q = KerConstStatic_atom_charges_const [atom1_id];
#endif
#if defined (FIXED_POINT_INTERE)
......@@ -499,25 +499,20 @@ while(active) {
#endif
switch (mode) {
case /*0x01*/ 'I': // IC
write_channel_altera(chan_Intere2StoreIC_intere, /*interE*/final_interE);
break;
// IC
case 'I': write_channel_altera(chan_Intere2StoreIC_intere, final_interE); break;
case /*0x02*/ 'G': // GG
write_channel_altera(chan_Intere2StoreGG_intere, /*interE*/final_interE);
break;
// GG
case 'G': write_channel_altera(chan_Intere2StoreGG_intere, final_interE); break;
case /*0x03*/ 0x01: // LS 1
write_channel_altera(chan_Intere2StoreLS_LS1_intere, /*interE*/final_interE);
break;
// LS 1
case 0x01: write_channel_altera(chan_Intere2StoreLS_LS1_intere, final_interE); break;
case 0x02: // LS 2
write_channel_altera(chan_Intere2StoreLS_LS2_intere, /*interE*/final_interE);
break;
// LS 2
case 0x02: write_channel_altera(chan_Intere2StoreLS_LS2_intere, final_interE); break;
case 0x03: // LS 3
write_channel_altera(chan_Intere2StoreLS_LS3_intere, /*interE*/final_interE);
break;
// LS 3
case 0x03: write_channel_altera(chan_Intere2StoreLS_LS3_intere, final_interE); break;
}
// --------------------------------------------------------------
......
......@@ -12,24 +12,14 @@ float sqrt_custom(const float x)
// --------------------------------------------------------------------------