Commit 2b7760ca authored by Leonardo Solis's avatar Leonardo Solis

added 23_run_harp2

parent 6f75b1ad
......@@ -142,6 +142,7 @@ ENABLE_K2 = YES
ENABLE_K3 = YES
ENABLE_K4 = YES
# Prng kernels
ENABLE_K5 = YES
ENABLE_K6 = YES
ENABLE_K7 = YES
......@@ -150,6 +151,13 @@ ENABLE_K9 = YES
ENABLE_K10 = YES
ENABLE_K11 = YES
# LS kernels
ENABLE_K12 = YES
ENABLE_K13 = YES
#ENABLE_K14 = YES
#ENABLE_K15 = YES
ifeq ($(ENABLE_K1),YES)
K1 =-DENABLE_KERNEL1
else
......@@ -216,7 +224,32 @@ else
K11 =
endif
ENABLE_KERNELS = $(K1) $(K2) $(K3) $(K4) $(K5) $(K6) $(K7) $(K8) $(K9) $(K10) $(K11)
ifeq ($(ENABLE_K12),YES)
K12 =-DENABLE_KERNEL12
else
K12 =
endif
ifeq ($(ENABLE_K13),YES)
K13 =-DENABLE_KERNEL13
else
K13 =
endif
ifeq ($(ENABLE_K14),YES)
K14 =-DENABLE_KERNEL14
else
K14 =
endif
ifeq ($(ENABLE_K15),YES)
K15 =-DENABLE_KERNEL15
else
K15 =
endif
ENABLE_KERNELS = $(K1) $(K2) $(K3) $(K4) $(K5) $(K6) $(K7) $(K8) $(K9) $(K10) $(K11) $(K12) $(K13) $(K14) $(K15)
# =============================
# Reproduce result (remove randomness)
......
......@@ -63,11 +63,7 @@ while(active) {
bool GG_valid = false;
bool LS_valid = false;
bool Off_valid = false;
/*
bool IC_active;
bool GG_active;
bool LS_active;
*/
float IC_active;
float GG_active;
float LS_active;
......@@ -81,7 +77,7 @@ while(active) {
(GG_valid == false) &&
(LS_valid == false) &&
*/
(Off_valid == false) && (pipe_cnt < DockConst_num_of_genes)
(Off_valid == false) && (pipe_cnt < DockConst_num_of_genes)
) {
IC_active = read_channel_nb_altera(chan_IC2Conf_genotype, &IC_valid);
GG_active = read_channel_nb_altera(chan_GG2Conf_genotype, &GG_valid);
......@@ -99,37 +95,23 @@ while(active) {
}
char mode;
/*
float genotype[ACTUAL_GENOTYPE_LENGTH];
*/
/*
active = (IC_valid) ? IC_active :
(GG_valid) ? GG_active :
(LS_valid) ? LS_active :
*/
//printf("LS_valid: %u, LS2_valid: %u\n", LS_valid, LS2_valid);
active = (IC_valid) ? true :
(GG_valid) ? true :
(LS_valid) ? true :
(Off_valid) ? Off_active :
false; // last case should never occur, otherwise above while would be still running
mode = (IC_valid) ? 0x01 :
(GG_valid) ? 0x02 :
(LS_valid) ? 0x03 :
(Off_valid) ? 0x05 :
0x05; // last case should never occur, otherwise above while would be still running
/*
for (uchar pipe_cnt=0; pipe_cnt<DockConst_num_of_genes; pipe_cnt++) {
genotype[pipe_cnt] = (IC_valid) ? read_channel_altera(chan_IC2Conf_genotype) :
(GG_valid) ? read_channel_altera(chan_GG2Conf_genotype) :
(LS_valid) ? read_channel_altera(chan_LS2Conf_genotype) :
(Off_valid) ? 0.0f:
0.0f; // last case should never occur, otherwise above while would be still running
}
*/
// --------------------------------------------------------------
//printf("AFTER In CONFORM CHANNEL\n");
/*
......@@ -317,6 +299,13 @@ while(active) {
printf("BEFORE Out CONFORM CHANNEL\n");
#endif
/*
if (mode == 0x04) {
printf("Krnl_Conform after loop\n");
}
*/
// --------------------------------------------------------------
// Send ligand atomic coordinates to channel
// --------------------------------------------------------------
......@@ -333,6 +322,11 @@ while(active) {
write_channel_altera(chan_Conf2Intere_xyz, loc_coords[pipe_cnt]);
write_channel_altera(chan_Conf2Intrae_xyz, loc_coords[pipe_cnt]);
}
/*
if (mode == 0x04) {
printf("Krnl_Conform sent\n");
}
*/
// --------------------------------------------------------------
#if defined (DEBUG_KRNL_CONFORM)
......
This diff is collapsed.
......@@ -276,7 +276,15 @@ while(active) {
case 0x03: // LS
write_channel_altera(chan_Intere2StoreLS_intere, interE);
break;
/*
case 0x04: // LS 2
write_channel_altera(chan_Intere2StoreLS_LS2_intere, interE);
break;
case 0x06: // LS 2
write_channel_altera(chan_Intere2StoreLS_LS2_intere, interE);
break;
*/
//case 5: // Off
// write_channel_altera(chan_Intere2StoreOff_intere, interE);
//break;
......
......@@ -164,10 +164,18 @@ while(active) {
write_channel_altera(chan_Intrae2StoreGG_intrae, intraE);
break;
case 0x03: // LS
case 0x03: // LS 1
write_channel_altera(chan_Intrae2StoreLS_intrae, intraE);
break;
/*
case 0x04: // LS 2
write_channel_altera(chan_Intrae2StoreLS_LS2_intrae, intraE);
break;
case 0x06: // LS 3
write_channel_altera(chan_Intrae2StoreLS_LS3_intrae, intraE);
break;
*/
//case 5: // Off
// write_channel_altera(chan_Intrae2StoreOff_intrae, intraE);
//break;
......
This diff is collapsed.
......@@ -3,8 +3,12 @@ channel bool chan_Arbiter_BT_float_active;
channel bool chan_Arbiter_GG_uchar_active;
channel bool chan_Arbiter_GG_float_active;
channel bool chan_Arbiter_LS_ushort_active;
channel bool chan_Arbiter_LS_float_active;
channel bool chan_Arbiter_LS_float_active;
/*
channel bool chan_Arbiter_LS2_float_active;
channel bool chan_Arbiter_LS3_float_active;
*/
// --------------------------------------------------------------------------
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
......@@ -19,6 +23,10 @@ while(active) {
bool GG_float_valid = false;
bool LS_ushort_valid = false;
bool LS_float_valid = false;
bool LS2_float_valid = false;
/*
bool LS3_float_valid = false;
*/
bool Off_valid = false;
bool BT_ushort_active;
......@@ -27,6 +35,10 @@ while(active) {
bool GG_float_active;
bool LS_ushort_active;
bool LS_float_active;
bool LS2_float_active;
/*
bool LS3_float_active;
*/
bool Off_active;
while((BT_ushort_valid == false) &&
......@@ -35,6 +47,10 @@ while(active) {
(GG_float_valid == false) &&
(LS_ushort_valid == false) &&
(LS_float_valid == false) &&
(LS2_float_valid == false) &&
/*
(LS3_float_valid == false) &&
*/
(Off_valid == false)
){
BT_ushort_active = read_channel_nb_altera(chan_GA2PRNG_BT_ushort_active, &BT_ushort_valid);
......@@ -43,6 +59,10 @@ while(active) {
GG_float_active = read_channel_nb_altera(chan_GA2PRNG_GG_float_active, &GG_float_valid);
LS_ushort_active = read_channel_nb_altera(chan_GA2PRNG_LS_ushort_active, &LS_ushort_valid);
LS_float_active = read_channel_nb_altera(chan_GA2PRNG_LS_float_active, &LS_float_valid);
/*
LS2_float_active = read_channel_nb_altera(chan_GA2PRNG_LS2_float_active, &LS2_float_valid);
LS3_float_active = read_channel_nb_altera(chan_GA2PRNG_LS3_float_active, &LS3_float_valid);
*/
Off_active = read_channel_nb_altera(chan_GA2PRNG_Off_active, &Off_valid);
}
......@@ -52,6 +72,10 @@ while(active) {
(GG_float_valid) ? GG_float_active :
(LS_ushort_valid)? LS_ushort_active :
(LS_float_valid) ? LS_float_active :
/*
(LS2_float_valid) ? LS2_float_active :
(LS3_float_valid) ? LS3_float_active :
*/
(Off_valid) ? Off_active :
false; // last case should never occur, otherwise above while would be still running
......@@ -78,7 +102,14 @@ while(active) {
if ((LS_float_valid == true) || (Off_valid == true)) {
write_channel_altera(chan_Arbiter_LS_float_active, active);
}
/*
if ((LS2_float_valid == true) || (Off_valid == true)) {
write_channel_altera(chan_Arbiter_LS2_float_active, active);
}
if ((LS_float_valid == true) || (Off_valid == true)) {
write_channel_altera(chan_Arbiter_LS_float_active, active);
}
*/
} // End of while(active)
}
......@@ -201,7 +232,9 @@ while(active) {
// --------------------------------------------------------------------------
__kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_Prng_LS_ushort(const unsigned int seed,
const unsigned int pop_size){
const unsigned int pop_size,
const unsigned int num_of_lsentities
){
uint lfsr = seed;
bool active = true;
......@@ -210,10 +243,9 @@ while(active) {
//active = read_channel_altera(chan_GA2PRNG_LS_ushort_active);
active = read_channel_altera(chan_Arbiter_LS_ushort_active);
/*
#pragma unroll 1
for(uchar i=0; i<16; i++) {
*/
// num_of_lsentities is uint but it is often 6% of 300 ~ < 20 entities
// so indexing with uchar is enough
for(uchar i=0; i<num_of_lsentities; i++) {
ushort tmp;
uchar lsb;
lsb = lfsr & 0x01u;
......@@ -224,10 +256,8 @@ while(active) {
if(active) {
write_channel_altera(chan_PRNG2GA_LS_ushort_prng, tmp);
}
/*
}
*/
}
} // End of while(active)
}
......@@ -267,5 +297,38 @@ while(active) {
}
#if 0
__kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_Prng_LS2_float(const unsigned int seed
){
uint lfsr = seed;
bool active = true;
while(active) {
//active = read_channel_altera(chan_GA2PRNG_LS_float_active);
active = read_channel_altera(chan_Arbiter_LS2_float_active);
/*
for(uchar i=0; i<num_genes; i++) {
*/
float tmp;
uchar lsb;
lsb = lfsr & 0x01u;
lfsr >>= 1;
lfsr ^= (-lsb) & 0xA3000000u;
tmp = (0.999999f/MAX_UINT)*lfsr;
if(active) {
write_channel_altera(chan_PRNG2GA_LS2_float_prng, tmp);
}
/*
}
*/
} // End of while(active)
}
#endif
// --------------------------------------------------------------------------
// --------------------------------------------------------------------------
......@@ -1172,6 +1172,39 @@ After commenting BAD IDEA:
## `23_run_harp2`
Idea: replicate LS while-loop as this seems to be serial and unlikey to be further optimized.
But first, `LS` is separated an made a kernel.
* Prngs for `LS` are generated all in a sequence.
* Re-enabled `Krnl_LS` which executes only LS while-loop
* Added a kernel for enabling `LS` kernel
Notice that logic utilization is further reduced:
| Resource | Usage |
| :----------------------------------: | :----------: |
| Logic utilization | 78% |
| ALUTs | 32% |
| Dedicated logic registers | 46% |
| Memory blocks | 62% |
| DSP blocks | 29% |
Working frequency is 213.3MHz
### Execution time (s) measurements from non-instrumented program
| Configuration | FPGA | CPU (AutoDock) | Speed-up | Comments |
| :--------------: | :----------: | :--------------: | :-------: | :------------: |
| 3ptb, 10 runs | 252.25 | 59.49 | | ~4.24x slower |
### Execution time (s) measurements from instrumented program
| Configuration | FPGA | CPU (AutoDock) | Speed-up | Comments |
| :--------------: | :----------: | :--------------: | :-------: | :------------: |
| 3ptb, 10 runs | 262.11 | 59.49 | | ~x slower |
......@@ -107,8 +107,34 @@ static cl_kernel kernel11 = NULL;
static const char *name_k11 = "Krnl_Prng_Arbiter";
#endif
#ifdef ENABLE_KERNEL12
static cl_command_queue command_queue12 = NULL;
static cl_kernel kernel12 = NULL;
static const char *name_k12 = "Krnl_LS";
#endif
#ifdef ENABLE_KERNEL13
static cl_command_queue command_queue13 = NULL;
static cl_kernel kernel13 = NULL;
static const char *name_k13 = "Krnl_LS_Arbiter";
#endif
#ifdef ENABLE_KERNEL14
static cl_command_queue command_queue14 = NULL;
static cl_kernel kernel14 = NULL;
static const char *name_k14 = "Krnl_Prng_LS2_float";
#endif
#ifdef ENABLE_KERNEL15
static cl_command_queue command_queue15 = NULL;
static cl_kernel kernel15 = NULL;
static const char *name_k15 = "Krnl_LS2";
#endif
static cl_program program = NULL;
......@@ -316,7 +342,7 @@ filled with clock() */
size_prng_seeds = sizeof(unsigned int);
*/
size_prng_seeds = 6*sizeof(unsigned int);
size_prng_seeds = 8*sizeof(unsigned int);
cpu_prng_seeds = (unsigned int*) alignedMalloc(size_prng_seeds);
genseed(time(NULL)); //initializing seed generator
......@@ -447,12 +473,13 @@ filled with clock() */
setKernelArg(kernel1,9, sizeof(float), &dockpars.abs_max_dang);
setKernelArg(kernel1,10, sizeof(float), &dockpars.crossover_rate);
setKernelArg(kernel1,11, sizeof(unsigned int), &dockpars.num_of_lsentities);
setKernelArg(kernel1,12, sizeof(unsigned int), &dockpars.max_num_of_iters);
setKernelArg(kernel1,13, sizeof(float), &dockpars.rho_lower_bound);
setKernelArg(kernel1,14, sizeof(float), &dockpars.base_dmov_mul_sqrt3);
setKernelArg(kernel1,15, sizeof(unsigned int), &dockpars.num_of_genes);
setKernelArg(kernel1,16, sizeof(float), &dockpars.base_dang_mul_sqrt3);
setKernelArg(kernel1,17, sizeof(unsigned int), &dockpars.cons_limit);
//setKernelArg(kernel1,12, sizeof(unsigned int), &dockpars.max_num_of_iters);
//setKernelArg(kernel1,13, sizeof(float), &dockpars.rho_lower_bound);
//setKernelArg(kernel1,14, sizeof(float), &dockpars.base_dmov_mul_sqrt3);
//setKernelArg(kernel1,15, sizeof(unsigned int), &dockpars.num_of_genes);
setKernelArg(kernel1,12, sizeof(unsigned int), &dockpars.num_of_genes);
//setKernelArg(kernel1,16, sizeof(float), &dockpars.base_dang_mul_sqrt3);
//setKernelArg(kernel1,17, sizeof(unsigned int), &dockpars.cons_limit);
#endif // End of ENABLE_KERNEL1
#ifdef ENABLE_KERNEL2 // Krnl_Conform
......@@ -529,11 +556,36 @@ filled with clock() */
#ifdef ENABLE_KERNEL9 // Krnl_PRNG_LS_ushort
setKernelArg(kernel9,1, sizeof(unsigned int), &dockpars.pop_size);
setKernelArg(kernel9,2, sizeof(unsigned int), &dockpars.num_of_lsentities);
#endif // End of ENABLE_KERNEL9
#ifdef ENABLE_KERNEL10 // Krnl_PRNG_uchar
setKernelArg(kernel10,1, sizeof(unsigned int), &dockpars.num_of_genes);
#endif // End of ENABLE_KERNEL10
#endif // End of ENABLE_KERNEL10
// Kernel 11 has no args
#ifdef ENABLE_KERNEL12 // Krnl_LS
setKernelArg(kernel12,0, sizeof(unsigned int), &dockpars.max_num_of_iters);
setKernelArg(kernel12,1, sizeof(float), &dockpars.rho_lower_bound);
setKernelArg(kernel12,2, sizeof(float), &dockpars.base_dmov_mul_sqrt3);
setKernelArg(kernel12,3, sizeof(unsigned int), &dockpars.num_of_genes);
setKernelArg(kernel12,4, sizeof(float), &dockpars.base_dang_mul_sqrt3);
setKernelArg(kernel12,5, sizeof(unsigned int), &dockpars.cons_limit);
#endif // End of ENABLE_KERNEL12
#ifdef ENABLE_KERNEL13 // Krnl_LS_Arbiter
setKernelArg(kernel13,0, sizeof(unsigned int), &dockpars.num_of_genes);
#endif // End of ENABLE_KERNEL12
#ifdef ENABLE_KERNEL15 // Krnl_LS2
setKernelArg(kernel15,0, sizeof(unsigned int), &dockpars.max_num_of_iters);
setKernelArg(kernel15,1, sizeof(float), &dockpars.rho_lower_bound);
setKernelArg(kernel15,2, sizeof(float), &dockpars.base_dmov_mul_sqrt3);
setKernelArg(kernel15,3, sizeof(unsigned int), &dockpars.num_of_genes);
setKernelArg(kernel15,4, sizeof(float), &dockpars.base_dang_mul_sqrt3);
setKernelArg(kernel15,5, sizeof(unsigned int), &dockpars.cons_limit);
#endif // End of ENABLE_KERNEL15
for (unsigned int run_cnt = 0; run_cnt < mypars->num_of_runs; run_cnt++)
{
......@@ -558,6 +610,7 @@ filled with clock() */
cpu_prng_seeds[3] = genseed(0u);
cpu_prng_seeds[4] = genseed(0u);
cpu_prng_seeds[5] = genseed(0u);
cpu_prng_seeds[6] = genseed(0u);
#endif
......@@ -602,6 +655,10 @@ filled with clock() */
#ifdef ENABLE_KERNEL10 // Krnl_PRNG_uchar
setKernelArg(kernel10,0, sizeof(unsigned int), &cpu_prng_seeds[5]);
#endif // End of ENABLE_KERNEL10
#ifdef ENABLE_KERNEL14 // Krnl_PRNG_LS2_float
setKernelArg(kernel14,0, sizeof(unsigned int), &cpu_prng_seeds[6]);
#endif // End of ENABLE_KERNEL7
#ifdef ENABLE_KERNEL1
runKernelTask(command_queue1,kernel1,NULL,NULL);
......@@ -645,7 +702,23 @@ filled with clock() */
#ifdef ENABLE_KERNEL11
runKernelTask(command_queue11,kernel11,NULL,NULL);
#endif // ENABLE_KERNEL10
#endif // ENABLE_KERNEL10
#ifdef ENABLE_KERNEL12
runKernelTask(command_queue12,kernel12,NULL,NULL);
#endif // ENABLE_KERNEL12
#ifdef ENABLE_KERNEL13
runKernelTask(command_queue13,kernel13,NULL,NULL);
#endif // ENABLE_KERNEL13
#ifdef ENABLE_KERNEL14
runKernelTask(command_queue14,kernel14,NULL,NULL);
#endif // ENABLE_KERNEL14
#ifdef ENABLE_KERNEL15
runKernelTask(command_queue15,kernel15,NULL,NULL);
#endif // ENABLE_KERNEL14
#ifdef ENABLE_KERNEL1
......@@ -690,6 +763,22 @@ filled with clock() */
#ifdef ENABLE_KERNEL11
clFinish(command_queue11);
#endif
#ifdef ENABLE_KERNEL12
clFinish(command_queue12);
#endif
#ifdef ENABLE_KERNEL13
clFinish(command_queue13);
#endif
#ifdef ENABLE_KERNEL14
clFinish(command_queue14);
#endif
#ifdef ENABLE_KERNEL15
clFinish(command_queue15);
#endif
clock_stop_docking = clock();
......@@ -1024,6 +1113,34 @@ bool init() {
kernel11 = clCreateKernel(program, name_k11, &status);
checkError(status, "Failed to create kernel");
#endif
#ifdef ENABLE_KERNEL12
command_queue12 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status);
checkError(status, "Failed to create command queue12");
kernel12 = clCreateKernel(program, name_k12, &status);
checkError(status, "Failed to create kernel");
#endif
#ifdef ENABLE_KERNEL13
command_queue13 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status);
checkError(status, "Failed to create command queue13");
kernel13 = clCreateKernel(program, name_k13, &status);
checkError(status, "Failed to create kernel");
#endif
#ifdef ENABLE_KERNEL14
command_queue14 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status);
checkError(status, "Failed to create command queue13");
kernel14 = clCreateKernel(program, name_k14, &status);
checkError(status, "Failed to create kernel");
#endif
#ifdef ENABLE_KERNEL15
command_queue15 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status);
checkError(status, "Failed to create command queue13");
kernel15 = clCreateKernel(program, name_k15, &status);
checkError(status, "Failed to create kernel");
#endif
return true;
}
......@@ -1083,6 +1200,26 @@ void cleanup() {
#ifdef ENABLE_KERNEL11
if(kernel11) {clReleaseKernel(kernel11);}
if(command_queue11) {clReleaseCommandQueue(command_queue11);}
#endif
#ifdef ENABLE_KERNEL12
if(kernel12) {clReleaseKernel(kernel12);}
if(command_queue12) {clReleaseCommandQueue(command_queue12);}
#endif
#ifdef ENABLE_KERNEL13
if(kernel13) {clReleaseKernel(kernel13);}
if(command_queue13) {clReleaseCommandQueue(command_queue13);}
#endif
#ifdef ENABLE_KERNEL14
if(kernel14) {clReleaseKernel(kernel14);}
if(command_queue14) {clReleaseCommandQueue(command_queue14);}
#endif
#ifdef ENABLE_KERNEL15
if(kernel15) {clReleaseKernel(kernel15);}
if(command_queue15) {clReleaseCommandQueue(command_queue15);}
#endif
if(program) {clReleaseProgram(program);}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment