Commit 969ae67c authored by Leonardo Solis's avatar Leonardo Solis
Browse files

replaced nb with blocking & deeper channels in GA

parent 100fc4c6
......@@ -44,8 +44,8 @@ channel char2 chan_Conf2Intrae_actmode;
channel char chan_Conf2Intrae_actmode;
// Send energy values from InterE & IntraE to genotype-senders (IC, GG, LSs)
channel float chan_Intere2StoreIC_intere __attribute__((depth(2)));
channel float chan_Intere2StoreGG_intere __attribute__((depth(2)));
channel float chan_Intere2StoreIC_intere __attribute__((depth(/*2*/MAX_POPSIZE)));
channel float chan_Intere2StoreGG_intere __attribute__((depth(/*2*/MAX_POPSIZE)));
/*
channel float chan_Intere2StoreLS_LS1_intere __attribute__((depth(2)));
channel float chan_Intere2StoreLS_LS2_intere __attribute__((depth(2)));
......@@ -57,8 +57,8 @@ channel float chan_Intere2StoreLS_LS7_intere __attribute__((depth(2)));
channel float chan_Intere2StoreLS_LS8_intere __attribute__((depth(2)));
channel float chan_Intere2StoreLS_LS9_intere __attribute__((depth(2)));
*/
channel float chan_Intrae2StoreIC_intrae __attribute__((depth(2)));
channel float chan_Intrae2StoreGG_intrae __attribute__((depth(2)));
channel float chan_Intrae2StoreIC_intrae __attribute__((depth(/*2*/MAX_POPSIZE)));
channel float chan_Intrae2StoreGG_intrae __attribute__((depth(/*2*/MAX_POPSIZE)));
/*
channel float chan_Intrae2StoreLS_LS1_intrae __attribute__((depth(2)));
channel float chan_Intrae2StoreLS_LS2_intrae __attribute__((depth(2)));
......@@ -371,7 +371,7 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
#endif
// Read energy
///*
/*
float energyIA_IC_rx;
float energyIE_IC_rx;
bool intra_valid = false;
......@@ -384,11 +384,13 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
energyIE_IC_rx = read_channel_nb_altera(chan_Intere2StoreIC_intere, &inter_valid);
}
}
//*/
/*
*/
///*
mem_fence(CLK_CHANNEL_MEM_FENCE);
float energyIA_IC_rx = read_channel_altera(chan_Intrae2StoreIC_intrae);
float energyIE_IC_rx = read_channel_altera(chan_Intere2StoreIC_intere);
*/
mem_fence(CLK_CHANNEL_MEM_FENCE);
//*/
LocalEneCurr[pop_cnt] = energyIA_IC_rx + energyIE_IC_rx;
#if defined (DEBUG_KRNL_IC)
......@@ -397,6 +399,9 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
}
// ------------------------------------------------------------------
//printf("\nIC loop finished!");
uint eval_cnt = DockConst_pop_size; // takes into account the IC evals
uint generation_cnt = 0;
......@@ -611,7 +616,7 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
#endif
// Read energy
///*
/*
float energyIA_GG_rx;
float energyIE_GG_rx;
bool intra_valid = false;
......@@ -624,11 +629,13 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
energyIE_GG_rx = read_channel_nb_altera(chan_Intere2StoreGG_intere, &inter_valid);
}
}
//*/
/*
*/
///*
mem_fence(CLK_CHANNEL_MEM_FENCE);
float energyIA_GG_rx = read_channel_altera(chan_Intrae2StoreGG_intrae);
float energyIE_GG_rx = read_channel_altera(chan_Intere2StoreGG_intere);
*/
mem_fence(CLK_CHANNEL_MEM_FENCE);
//*/
LocalEneNext[new_pop_cnt] = energyIA_GG_rx + energyIE_GG_rx;
#if defined (DEBUG_KRNL_GG)
......
  • The circuit of this commit (only-ga + smoothing + simplified channnels igl2conf + blocking & deeper channels in GA) is stable during execution in sauron and, as expected is faster than the full circuit (DC4b (FPL/FSP), i.e., including Solis-Wets).

    Best energies are not supposed to be as good as those of the full circuit, neither the cluster size (not shown). The importance of next table is the execution times, because this commit might improve performance of full circuit.

    DC4b (FPL/FSP) This commit
    Freq 187.5 MHz 173.6 MHz
    Best E, Time (Kcal/mol), (sec)
    3ptb -5.53, 211 -5.28, 119
    1stp -7.76, 385 -6.67, 199
    4hmg -4.11, 623 -1.35, 384
    3ce3 -10.88, 1077 -7.21, 808
    3c1x -12.61, 1487 +1.76, 1236
    Edited by Leonardo Solis
  • Removing last mem_fence(CLK_CHANNEL_MEM_FENCE) after the two consecutive blocking reads in both IC and GG, causes a deadlock on the FPGA.

    Therefore, last mem_fence(CLK_CHANNEL_MEM_FENCE) was enabled back.

  • If that last mem_fence(CLK_CHANNEL_MEM_FENCE) is put in between the two channel reads (instead of after both reads), circuit does not hang anymore and it is even faster. Check commit c313d04c.

    Edited by Leonardo Solis
  • mentioned in commit c313d04c

    Toggle commit list
  • mentioned in commit 3f211e8f

    Toggle commit list
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment