Commit fa98a210 authored by Leonardo Solis's avatar Leonardo Solis
Browse files

added fences between turn-off write GA channels


Former-commit-id: 7dab5b18
parent da158740
......@@ -62,8 +62,10 @@ while(active) {
fixedpt phi;
fixedpt theta;
fixedpt genrotangle;
/*
fixedpt sin_theta, cos_theta;
fixedpt3 genrot_unitvec;
*/
fixedpt3 genotype_xyz;
fixedpt3 __attribute__ ((
memory,
......@@ -77,8 +79,10 @@ while(active) {
float phi;
float theta;
float genrotangle;
/*
float sin_theta, cos_theta;
float3 genrot_unitvec;
*/
float3 genotype_xyz;
float3 __attribute__ ((
memory,
......@@ -168,6 +172,7 @@ while(active) {
if (active == 0x00) {printf(" %-20s: %s\n", "Krnl_Conform", "must be disabled");}
#endif
/*
#if defined (FIXED_POINT_CONFORM)
sin_theta = fixedpt_sin(theta);
cos_theta = fixedpt_cos(theta);
......@@ -181,6 +186,7 @@ while(active) {
genrot_unitvec.y = sin_theta*native_sin(phi);
genrot_unitvec.z = cos_theta;
#endif
*/
for (ushort rotation_counter = 0; rotation_counter < DockConst_rotbondlist_length; rotation_counter++)
{
......@@ -220,6 +226,24 @@ while(active) {
if ((rotation_list_element & RLIST_GENROT_MASK) != 0) //if general rotation
{
#if defined (FIXED_POINT_CONFORM)
fixedpt sin_theta, cos_theta;
fixedpt3 genrot_unitvec;
sin_theta = fixedpt_sin(theta);
cos_theta = fixedpt_cos(theta);
genrot_unitvec.x = fixedpt_mul(sin_theta, fixedpt_cos(phi));
genrot_unitvec.y = fixedpt_mul(sin_theta, fixedpt_sin(phi));
genrot_unitvec.z = cos_theta;
#else
float sin_theta, cos_theta;
float3 genrot_unitvec;
sin_theta = native_sin(theta);
cos_theta = native_cos(theta);
genrot_unitvec.x = sin_theta*native_cos(phi);
genrot_unitvec.y = sin_theta*native_sin(phi);
genrot_unitvec.z = cos_theta;
#endif
rotation_unitvec = genrot_unitvec;
rotation_angle = genrotangle;
......
......@@ -609,6 +609,7 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
// ------------------------------------------------------------------
// Off: turn off all other kernels
// ------------------------------------------------------------------
/*
write_channel_altera(chan_GA2LS_Off1_active, false); // turn off LS_Arbiter, LS1
write_channel_altera(chan_GA2LS_Off2_active, false); // turn off LS2_Arbiter, LS2
write_channel_altera(chan_GA2LS_Off3_active, false); // turn off LS3_Arbiter, LS3
......@@ -623,7 +624,29 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
write_channel_altera(chan_Arbiter_LS3_float_off, false);
write_channel_altera(chan_IGLArbiter_Off, false); // turn off IGL_Arbiter, Conform, InterE, IntraE
*/
// turn off PRNG kernels
write_channel_altera(chan_Arbiter_BT_ushort_float_off, false);
write_channel_altera(chan_Arbiter_GG_uchar_off, false);
write_channel_altera(chan_Arbiter_GG_float_off, false);
write_channel_altera(chan_Arbiter_LS123_ushort_off, false);
write_channel_altera(chan_Arbiter_LS_float_off, false);
write_channel_altera(chan_Arbiter_LS2_float_off, false);
write_channel_altera(chan_Arbiter_LS3_float_off, false);
mem_fence(CLK_CHANNEL_MEM_FENCE);
// turn off LS kernels
write_channel_altera(chan_GA2LS_Off1_active, false);
write_channel_altera(chan_GA2LS_Off2_active, false);
write_channel_altera(chan_GA2LS_Off3_active, false);
mem_fence(CLK_CHANNEL_MEM_FENCE);
// turn off IGL, Conform, IE, IA
write_channel_altera(chan_IGLArbiter_Off, false);
mem_fence(CLK_CHANNEL_MEM_FENCE);
// write final pop & energies back to FPGA-board DDRs
for (ushort pop_cnt=0;pop_cnt<DockConst_pop_size; pop_cnt++) {
for (uchar gene_cnt=0; gene_cnt<DockConst_num_of_genes; gene_cnt++) {
......@@ -649,6 +672,7 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
printf(" %-20s: %s\n", "Krnl_GA", "disabled");
#endif
// write final evaluation and generation counts to FPGA-board DDRs
#if defined(SINGLE_COPY_POP_ENE)
GlobEvals_performed[Host_RunId] = eval_cnt;
GlobGens_performed [Host_RunId] = generation_cnt;
......
......@@ -1090,12 +1090,19 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit;
#if defined(SINGLE_COPY_POP_ENE)
memcopyBufferObjectToDevice(command_queue1,mem_dockpars_conformations_current, cpu_init_populations, size_populations);
#endif
printf("Docking runs to be executed: %u\n", mypars->num_of_runs);
printf("Execution run: ");
for (unsigned int run_cnt = 0; run_cnt < mypars->num_of_runs; run_cnt++)
{
/*
printf("Run %3u started ... \n", run_cnt+1);
fflush(stdout);
*/
printf(" %u", run_cnt+1);
fflush(stdout);
#if defined(SINGLE_COPY_POP_ENE)
......@@ -1593,8 +1600,9 @@ unsigned char Host_cons_limit = (unsigned char) dockpars.cons_limit;
} // End of for (run_cnt = 0; run_cnt < mypars->num_of_runs; run_cnt++)
printf("\n");
#if defined(SINGLE_COPY_POP_ENE)
......
......@@ -486,7 +486,20 @@ Non-instrumented hang on Sauron
Speedup vs i5 cpu core: 3ptb: 59/40 = 1.47x, 1stp: 84/77 = 1.09x
>>> commit "simplified IGL: handles only active&mode, but not data"
160. `Krnl_Conform`: reduced scope of sin_theta, cos_theta, genrot_unitvec (keeps II=10, and reduces estimated hw usage)
freq: 187.5 MHz (instrumented freq: 175 MHz)( 43 sec INSTRUMENTED on 10runs 3ptb, 81 sec on 1stp)
Non-instrumented hang on Sauron
Speedup vs i5 cpu core: 3ptb: 59/43 = 1.37x, 1stp: 84/81 = 1.03x
>>> NO COMMIT
161. `Krnl_GA`: added fences between final write channels that turn off the rest of kernels
162. `Krnl_GA`: improved number of executed-runs messages
freq: 187 MHz (instrumented freq: 174 MHz)(40 sec INSTRUMENTED on 10runs 3ptb, 76 sec on 1stp)
Both run in Sauron
Speedup vs i5 cpu core: 3ptb: 59/40 = 1.47x, 1stp: 84/76 = 1.1x
>>> commit "added fences between turn-off write GA channels"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment