Commit d8a7ddab authored by Leonardo Solis's avatar Leonardo Solis
Browse files

reduced scope ls_eval_cnt

parent d9099aa5
......@@ -25,6 +25,9 @@ ofdock_taskpar_alt/docking.aocx
*~
final_population_run*
# Ignore initial dynamic profile results
ofdock_taskpar_alt/dyn_profile/*_run*
# ===================
# C gitignore
# https://github.com/github/gitignore/blob/master/C.gitignore
......
......@@ -7,20 +7,24 @@
//OFF: turn off
#include "../defines.h"
#define CHAN_DEPTH_ATOMXYZ (MAX_NUM_OF_ATOMS/2)
#define CHAN_DEPTH_GENOTYPE ACTUAL_GENOTYPE_LENGTH
channel bool chan_GA2IGL_IC_active;
channel bool chan_GA2IGL_GG_active;
channel float chan_IC2Conf_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_GG2Conf_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_LS2Conf_LS1_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_LS2Conf_LS2_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_LS2Conf_LS3_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_IC2Conf_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_GG2Conf_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_LS2Conf_LS1_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_LS2Conf_LS2_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_LS2Conf_LS3_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
// IC, GG, LS1
channel float8 chan_Conf2Intere_xyz __attribute__((depth(MAX_NUM_OF_ATOMS/2)));
// Conform to IE, IA
channel float8 chan_Conf2Intere_xyz __attribute__((depth(CHAN_DEPTH_ATOMXYZ)));
channel char2 chan_Conf2Intere_actmode;
channel float8 chan_Conf2Intrae_xyz __attribute__((depth(MAX_NUM_OF_ATOMS/2)));
channel float8 chan_Conf2Intrae_xyz __attribute__((depth(CHAN_DEPTH_ATOMXYZ)));
channel char2 chan_Conf2Intrae_actmode;
// Send data back to generators of genotypes
......@@ -40,13 +44,13 @@ channel float chan_Intrae2StoreLS_LS3_intrae __attribute__((depth(2)));
channel float8 chan_PRNG2GA_BT_ushort_float_prng;
channel uchar2 chan_PRNG2GA_GG_uchar_prng;
channel float chan_PRNG2GA_GG_float_prng __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_PRNG2GA_GG_float_prng __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel ushort3 chan_PRNG2GA_LS123_ushort_prng;
channel float chan_PRNG2GA_LS_float_prng __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_PRNG2GA_LS2_float_prng __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_PRNG2GA_LS3_float_prng __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_PRNG2GA_LS_float_prng __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_PRNG2GA_LS2_float_prng __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_PRNG2GA_LS3_float_prng __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel bool chan_Arbiter_BT_ushort_float_off;
channel bool chan_Arbiter_GG_uchar_off;
......@@ -62,22 +66,29 @@ channel bool chan_GA2PRNG_LS_float_Off;
channel float chan_GA2LS_LS1_energy;
channel float chan_GA2LS_LS2_energy;
channel float chan_GA2LS_LS3_energy;
channel float chan_GA2LS_LS1_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_GA2LS_LS2_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_GA2LS_LS3_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_GA2LS_LS1_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_GA2LS_LS2_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_GA2LS_LS3_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel bool chan_LS2Arbiter_LS1_end;
channel bool chan_LS2Arbiter_LS2_end;
channel bool chan_LS2Arbiter_LS3_end;
channel float2 chan_LS2GA_LS1_evalenergy __attribute__((depth(2)));
channel float2 chan_LS2GA_LS2_evalenergy __attribute__((depth(2)));
channel float2 chan_LS2GA_LS3_evalenergy __attribute__((depth(2)));
channel float chan_LS2GA_LS1_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_LS2GA_LS2_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_LS2GA_LS3_genotype __attribute__((depth(ACTUAL_GENOTYPE_LENGTH)));
channel float chan_LS2GA_LS1_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_LS2GA_LS2_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel float chan_LS2GA_LS3_genotype __attribute__((depth(CHAN_DEPTH_GENOTYPE)));
channel bool chan_GA2LS_Off1_active;
channel bool chan_GA2LS_Off2_active;
channel bool chan_GA2LS_Off3_active;
channel bool chan_IGLArbiter_Off;
// IGL_Arbiter -> Conform
channel char2 chan_IGL2Conform_actmode __attribute__((depth(3))); // active, mode
channel float chan_IGL2Conform_genotype __attribute__((depth(3*CHAN_DEPTH_GENOTYPE )));
channel bool chan_IGLArbiter_Off;
#if defined (FIXED_POINT_CONFORM) || (FIXED_POINT_LS1) || defined (FIXED_POINT_LS2) || defined (FIXED_POINT_LS3)
#include "../defines_fixedpt.h"
......@@ -255,7 +266,9 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
// ------------------------------------------------------------------
uint eval_cnt = DockConst_pop_size; // takes into account the IC evals
/*
uint ls_eval_cnt = 0;
*/
uint generation_cnt = 0;
while ((eval_cnt < DockConst_num_of_energy_evals) && (generation_cnt < DockConst_num_of_generations)) {
......@@ -493,7 +506,10 @@ void Krnl_GA(__global float* restrict GlobPopulationCurrent,
// LS2
// LS3
// ------------------------------------------------------------------
/*
ls_eval_cnt = 0;
*/
uint ls_eval_cnt = 0;
#pragma ivdep
//for (ushort ls_ent_cnt=0; ls_ent_cnt<DockConst_num_of_lsentities; ls_ent_cnt++) {
......
// Output channels IGL_Arbiter -> Conform
channel char2 chan_IGL2Conform_actmode __attribute__((depth(3))); // active, mode
channel float chan_IGL2Conform_genotype __attribute__((depth(3*ACTUAL_GENOTYPE_LENGTH)));
// --------------------------------------------------------------------------
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
......
channel bool chan_LS2Arbiter_LS1_end;
// --------------------------------------------------------------------------
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_LS(
//unsigned int DockConst_max_num_of_iters,
unsigned short DockConst_max_num_of_iters,
#if defined (FIXED_POINT_LS1)
fixedpt DockConst_rho_lower_bound,
......@@ -21,17 +18,9 @@ void Krnl_LS(
float DockConst_base_dang_mul_sqrt3,
#endif
//unsigned int DockConst_cons_limit
unsigned char DockConst_cons_limit
unsigned char DockConst_cons_limit
)
{
/*
#if defined (FIXED_POINT_LS1)
__local fixedpt genotype [ACTUAL_GENOTYPE_LENGTH];
#else
__local float genotype [ACTUAL_GENOTYPE_LENGTH];
#endif
*/
bool valid = true;
/*
// added to find out which fixed-point precision is needed
......@@ -79,7 +68,7 @@ while(valid) {
#if defined (FIXED_POINT_LS1)
fixedpt genotype [ACTUAL_GENOTYPE_LENGTH];
#else
float genotype [ACTUAL_GENOTYPE_LENGTH];
float genotype [ACTUAL_GENOTYPE_LENGTH];
#endif
for (uchar i=0; i<DockConst_num_of_genes; i++) {
......@@ -254,10 +243,6 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (positive_direction == true) ? deviate_plus_bias [i] :
deviate_minus_bias [i] ;
genotype [i] = entity_possible_new_genotype [i];
......@@ -274,10 +259,6 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (iteration_cnt == 1)? 0: (genotype_bias [i] >> 1);
}
......@@ -295,13 +276,8 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (positive_direction == true) ? deviate_plus_bias [i] :
deviate_minus_bias [i] ;
genotype [i] = entity_possible_new_genotype [i];
}
......@@ -315,11 +291,6 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (iteration_cnt == 1)? 0.0f: (0.5f*genotype_bias [i]);
}
......@@ -340,13 +311,6 @@ while(valid) {
// write back data to GA
for (uchar i=0; i<DockConst_num_of_genes; i++) {
if (i == 0) {
/*
write_channel_altera(chan_LS2GA_LS1_eval, LS_eval);
mem_fence(CLK_CHANNEL_MEM_FENCE);
write_channel_altera(chan_LS2GA_LS1_energy, current_energy);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float2 evalenergy = {*(float*)&LS_eval, current_energy};
write_channel_altera(chan_LS2GA_LS1_evalenergy, evalenergy);
}
......
channel bool chan_LS2Arbiter_LS2_end;
// --------------------------------------------------------------------------
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_LS2(
//unsigned int DockConst_max_num_of_iters,
unsigned short DockConst_max_num_of_iters,
#if defined (FIXED_POINT_LS2)
fixedpt DockConst_rho_lower_bound,
......@@ -20,18 +17,9 @@ void Krnl_LS2(
float DockConst_base_dang_mul_sqrt3,
#endif
//unsigned int DockConst_cons_limit
unsigned char DockConst_cons_limit
unsigned char DockConst_cons_limit
)
{
/*
#if defined (FIXED_POINT_LS2)
__local fixedpt genotype [ACTUAL_GENOTYPE_LENGTH];
#else
__local float genotype [ACTUAL_GENOTYPE_LENGTH];
#endif
*/
bool valid = true;
while(valid) {
......@@ -53,7 +41,7 @@ while(valid) {
#if defined (FIXED_POINT_LS2)
fixedpt genotype [ACTUAL_GENOTYPE_LENGTH];
#else
float genotype [ACTUAL_GENOTYPE_LENGTH];
float genotype [ACTUAL_GENOTYPE_LENGTH];
#endif
for (uchar i=0; i<DockConst_num_of_genes; i++) {
......@@ -236,11 +224,6 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (positive_direction == true) ? deviate_plus_bias [i]:
deviate_minus_bias [i];
......@@ -258,11 +241,6 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (iteration_cnt == 1)? 0: (genotype_bias [i] >> 1);
}
......@@ -279,15 +257,8 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (positive_direction == true) ? deviate_plus_bias [i] :
deviate_minus_bias [i] ;
genotype [i] = entity_possible_new_genotype [i];
}
......@@ -301,11 +272,6 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (iteration_cnt == 1)? 0.0f: (0.5f*genotype_bias [i]);
}
......@@ -326,13 +292,6 @@ while(valid) {
// write back data to GA
for (uchar i=0; i<DockConst_num_of_genes; i++) {
if (i == 0) {
/*
write_channel_altera(chan_LS2GA_LS2_eval, LS_eval);
mem_fence(CLK_CHANNEL_MEM_FENCE);
write_channel_altera(chan_LS2GA_LS2_energy, current_energy);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float2 evalenergy = {*(float*)&LS_eval, current_energy};
write_channel_altera(chan_LS2GA_LS2_evalenergy, evalenergy);
}
......
channel bool chan_LS2Arbiter_LS3_end;
// --------------------------------------------------------------------------
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_LS3(
//unsigned int DockConst_max_num_of_iters,
unsigned short DockConst_max_num_of_iters,
#if defined (FIXED_POINT_LS3)
fixedpt DockConst_rho_lower_bound,
......@@ -20,18 +17,9 @@ void Krnl_LS3(
float DockConst_base_dang_mul_sqrt3,
#endif
//unsigned int DockConst_cons_limit
unsigned char DockConst_cons_limit
unsigned char DockConst_cons_limit
)
{
/*
#if defined (FIXED_POINT_LS3)
__local fixedpt genotype [ACTUAL_GENOTYPE_LENGTH];
#else
__local float genotype [ACTUAL_GENOTYPE_LENGTH];
#endif
*/
bool valid = true;
while(valid) {
......@@ -53,7 +41,7 @@ while(valid) {
#if defined (FIXED_POINT_LS3)
fixedpt genotype [ACTUAL_GENOTYPE_LENGTH];
#else
float genotype [ACTUAL_GENOTYPE_LENGTH];
float genotype [ACTUAL_GENOTYPE_LENGTH];
#endif
for (uchar i=0; i<DockConst_num_of_genes; i++) {
......@@ -236,11 +224,6 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (positive_direction == true) ? deviate_plus_bias [i] :
deviate_minus_bias [i];
genotype [i] = entity_possible_new_genotype [i];
......@@ -257,11 +240,6 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (iteration_cnt == 1)? 0: (genotype_bias [i] >> 1);
}
......@@ -279,14 +257,8 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (positive_direction == true) ? deviate_plus_bias [i]:
deviate_minus_bias [i];
genotype [i] = entity_possible_new_genotype [i];
}
......@@ -300,11 +272,6 @@ while(valid) {
//#pragma unroll 16
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
#pragma unroll
for (uchar i=0; i<ACTUAL_GENOTYPE_LENGTH; i++) {
*/
genotype_bias [i] = (iteration_cnt == 1)? 0.0f: (0.5f*genotype_bias [i]);
}
......@@ -325,13 +292,6 @@ while(valid) {
// write back data to GA
for (uchar i=0; i<DockConst_num_of_genes; i++) {
if (i == 0) {
/*
write_channel_altera(chan_LS2GA_LS3_eval, LS_eval);
mem_fence(CLK_CHANNEL_MEM_FENCE);
write_channel_altera(chan_LS2GA_LS3_energy, current_energy);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float2 evalenergy = {*(float*)&LS_eval, current_energy};
write_channel_altera(chan_LS2GA_LS3_evalenergy, evalenergy);
}
......
......@@ -460,24 +460,25 @@ First achieving speedup vs i5 cpu core: 3ptb: 59/43 = 1.37x, 1stp: 84/81 = 1.03x
>>> commit "set __constant to 12KB + aoc fp flags"
XXX, Between Conform and InterE, IntraE create a wider channel:
https://www.alteraforum.com/forum/showthread.php?t=55979
155. `Krnl_LS`, `Krnl_LS2`,`Krnl_LS3`: cleaned up + moved `chan_LS2Arbiter_LSX_end` channels to `Krnl_GA`
156. `Krnl_IGL_Arbiter`: moved channels to `Krnl_GA`
157. `Krnl_GA`: create #defines for channel depths: CHAN_DEPTH_ATOMXYZ and CHAN_DEPTH_GENOTYPE
. `Krnl_GA`: reduce CHAN_DEPTH_GENOTYPE depth for PRNG channels from ACTUAL_GENOTYPE_LENGTH to 4
. `Krnl_GA`: reduce CHAN_DEPTH_GENOTYPE depth for GA2LS genotype channels from ACTUAL_GENOTYPE_LENGTH to 2
-> switched back to original depth as performance is reduced (48 (3ptb) and 87 (1stp) for 10 runs)
158. `Krnl_GA`: reduce scope of ls_eval_cnt
freq: 178 MHz (43 sec non-instrumented on 10runs 3ptb, 81 sec on 1stp)(instrumented freq: 174 MHz)
Same speedup as previous commit
>>> commit "reduced scope ls_eval_cnt"
XXX, Between Conform and InterE, IntraE create a wider channel:
https://www.alteraforum.com/forum/showthread.php?t=55979
NOT DONE YET
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment